diff --git a/CMakeLists.txt b/CMakeLists.txt index 7c7eb260aea8478f4833cb79253f4481e10b8685..e8ea828dd2a25f5f47b03e92ae86e083d4425dc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_F option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) -option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) +option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) option(WITH_STYLE_CHECK "Compile PaddlePaddle with style check" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 585db019d521b1699baadfae31ef95b5059c71b4..18770fe2861380ea1320aef5cb7ec3432147d7ce 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -224,12 +224,18 @@ function(cc_test TARGET_NAME) if(WITH_TESTING) set(options "") set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) + set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + # Support linking flags: --whole-archive (Linux) / -force_load (MacOS) + target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + if("${cc_test_DEPS}" MATCHES "ARCHIVE_START") + list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END) + endif() add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) - add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + add_test(NAME ${TARGET_NAME} + COMMAND ${TARGET_NAME} ${cc_test_ARGS} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() endfunction(cc_test) @@ -457,7 +463,7 @@ endfunction() function(py_test TARGET_NAME) if(WITH_TESTING) - set(options STATIC static SHARED shared) + set(options "") set(oneValueArgs "") set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) diff --git a/doc/api/v2/fluid/data_feeder.rst b/doc/api/v2/fluid/data_feeder.rst index 0fa78f7dfb04c13be7eb83b7fd35cb03f2f4a7fa..a591c7334fd31c98a94b50a4344f251560a0f2f9 100644 --- a/doc/api/v2/fluid/data_feeder.rst +++ b/doc/api/v2/fluid/data_feeder.rst @@ -1,9 +1,14 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -DataFeeder +data_feeder =========== DataFeeder ------------ -.. automodule:: paddle.v2.fluid.data_feeder - :members: DataFeeder +---------- + +.. autoclass:: paddle.v2.fluid.data_feeder.DataFeeder + :members: :noindex: + diff --git a/doc/api/v2/fluid/evaluator.rst b/doc/api/v2/fluid/evaluator.rst index a23f3301d0331e0ea3733f06444515eb4680cd31..00dcecfd628a35d83d1c596bf0aea819a1705862 100644 --- a/doc/api/v2/fluid/evaluator.rst +++ b/doc/api/v2/fluid/evaluator.rst @@ -1,9 +1,21 @@ -=========== -Evaluator -=========== - -Evaluator ------------ -.. automodule:: paddle.v2.fluid.evaluator - :members: Evaluator +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +========= +evaluator +========= + +Accuracy +-------- + +.. autoclass:: paddle.v2.fluid.evaluator.Accuracy + :members: :noindex: + +ChunkEvaluator +-------------- + +.. autoclass:: paddle.v2.fluid.evaluator.ChunkEvaluator + :members: + :noindex: + diff --git a/doc/api/v2/fluid/executor.rst b/doc/api/v2/fluid/executor.rst index 3a283538c120cfa1ef646c390bb71c6251c23675..a028f6283f2ca333bdf6c9857a98661c0222b41e 100644 --- a/doc/api/v2/fluid/executor.rst +++ b/doc/api/v2/fluid/executor.rst @@ -1,9 +1,32 @@ -=========== -Executor -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +======== +executor +======== Executor +-------- + +.. autoclass:: paddle.v2.fluid.executor.Executor + :members: + :noindex: + +global_scope +------------ + +.. autofunction:: paddle.v2.fluid.executor.global_scope + :noindex: + +scope_guard ----------- -.. automodule:: paddle.v2.fluid.executor - :members: Executor + +.. autofunction:: paddle.v2.fluid.executor.scope_guard + :noindex: + +switch_scope +------------ + +.. autofunction:: paddle.v2.fluid.executor.switch_scope :noindex: + diff --git a/doc/api/v2/fluid/gen_doc.py b/doc/api/v2/fluid/gen_doc.py new file mode 100644 index 0000000000000000000000000000000000000000..a2147fd3f7ea635d8f14210fbcd1a568ee2230ee --- /dev/null +++ b/doc/api/v2/fluid/gen_doc.py @@ -0,0 +1,109 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import argparse +import sys +import types + +import paddle.v2.fluid as fluid + + +def parse_arg(): + parser = argparse.ArgumentParser() + parser.add_argument('--submodules', nargs="*") + parser.add_argument( + 'module', type=str, help='Generate the documentation of which module') + return parser.parse_args() + + +class DocGenerator(object): + def __init__(self, module_name, stream=sys.stdout): + self.stream = stream + self.module_name = module_name + if not hasattr(fluid, module_name): + raise ValueError("Cannot find fluid.{0}".format(module_name)) + else: + self.module = getattr(fluid, module_name) + self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +''') + + self._print_header_(module_name, dot='=', is_title=True) + + def print_submodule(self, submodule_name): + submodule = getattr(self.module, submodule_name) + if submodule is None: + raise ValueError("Cannot find submodule {0}".format(submodule_name)) + self.print_section(submodule_name) + + for item in submodule.__all__: + self.print_item(item) + + def print_current_module(self): + for item in self.module.__all__: + self.print_item(item) + + def print_section(self, name): + self._print_header_(name, dot='=', is_title=False) + + def print_item(self, name): + item = getattr(self.module, name) + if isinstance(item, types.TypeType): + self.print_class(name) + elif isinstance(item, types.FunctionType): + self.print_method(name) + else: + raise RuntimeError("Unsupported item {0}".format(name)) + + def print_class(self, name): + self._print_header_(name, dot='-', is_title=False) + self.stream.write('''.. autoclass:: paddle.v2.fluid.{0}.{1} + :members: + :noindex: + +'''.format(self.module_name, name)) + + def print_method(self, name): + self._print_header_(name, dot='-', is_title=False) + self.stream.write('''.. autofunction:: paddle.v2.fluid.{0}.{1} + :noindex: + +'''.format(self.module_name, name)) + + def _print_header_(self, name, dot, is_title): + dot_line = dot * len(name) + if is_title: + self.stream.write(dot_line) + self.stream.write('\n') + self.stream.write(name) + self.stream.write('\n') + self.stream.write(dot_line) + self.stream.write('\n') + self.stream.write('\n') + + +def main(): + args = parse_arg() + gen = DocGenerator(args.module) + if args.submodules is None: + gen.print_current_module() + else: + for submodule_name in args.submodules: + gen.print_submodule(submodule_name) + + +if __name__ == '__main__': + main() diff --git a/doc/api/v2/fluid/gen_doc.sh b/doc/api/v2/fluid/gen_doc.sh new file mode 100755 index 0000000000000000000000000000000000000000..ba7b7ba8e51399deb852b0a7c8ddd3128f521e85 --- /dev/null +++ b/doc/api/v2/fluid/gen_doc.sh @@ -0,0 +1,7 @@ +#!/bin/bash +python gen_doc.py layers --submodules control_flow device io nn ops tensor > layers.rst + +for module in io data_feeder evaluator executor initializer io nets optimizer param_attr profiler regularizer +do + python gen_doc.py ${module} > ${module}.rst +done diff --git a/doc/api/v2/fluid/initializer.rst b/doc/api/v2/fluid/initializer.rst index 8f587837e9873370722062404f511654a9460587..c38be033fff2997930525f51c93995db09daa2b6 100644 --- a/doc/api/v2/fluid/initializer.rst +++ b/doc/api/v2/fluid/initializer.rst @@ -1,50 +1,35 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -Initializer +initializer =========== +Constant +-------- - -Initializer ------------ -.. automodule:: paddle.v2.fluid.initializer - :members: Initializer - :noindex: - - - -ConstantInitializer -------------------- -.. automodule:: paddle.v2.fluid.initializer - :members: ConstantInitializer +.. autoclass:: paddle.v2.fluid.initializer.Constant + :members: :noindex: +Uniform +------- - -UniformInitializer ------------------- -.. automodule:: paddle.v2.fluid.initializer - :members: UniformInitializer - :noindex: - - - -NormalInitializer ------------------ -.. automodule:: paddle.v2.fluid.initializer - :members: NormalInitializer +.. autoclass:: paddle.v2.fluid.initializer.Uniform + :members: :noindex: +Normal +------ -XavierInitializer ------------------ -.. automodule:: paddle.v2.fluid.initializer - :members: XavierInitializer +.. autoclass:: paddle.v2.fluid.initializer.Normal + :members: :noindex: +Xavier +------ -MSRAInitializer ---------------- -.. automodule:: paddle.v2.fluid.initializer - :members: MSRAInitializer +.. autoclass:: paddle.v2.fluid.initializer.Xavier + :members: :noindex: diff --git a/doc/api/v2/fluid/io.rst b/doc/api/v2/fluid/io.rst index 67f68c4e9e16b379207b8de114cdf769e056f78e..37c9c273e369532e8ff596e9649cb695a98a2505 100644 --- a/doc/api/v2/fluid/io.rst +++ b/doc/api/v2/fluid/io.rst @@ -1,10 +1,61 @@ -=========== -IO -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +== +io +== +save_vars +--------- -is_parameter +.. autofunction:: paddle.v2.fluid.io.save_vars + :noindex: + +save_params ----------- -.. autofunction:: paddle.v2.fluid.io.is_parameter + +.. autofunction:: paddle.v2.fluid.io.save_params + :noindex: + +save_persistables +----------------- + +.. autofunction:: paddle.v2.fluid.io.save_persistables + :noindex: + +load_vars +--------- + +.. autofunction:: paddle.v2.fluid.io.load_vars + :noindex: + +load_params +----------- + +.. autofunction:: paddle.v2.fluid.io.load_params :noindex: + +load_persistables +----------------- + +.. autofunction:: paddle.v2.fluid.io.load_persistables + :noindex: + +save_inference_model +-------------------- + +.. autofunction:: paddle.v2.fluid.io.save_inference_model + :noindex: + +load_inference_model +-------------------- + +.. autofunction:: paddle.v2.fluid.io.load_inference_model + :noindex: + +get_inference_program +--------------------- + +.. autofunction:: paddle.v2.fluid.io.get_inference_program + :noindex: + diff --git a/doc/api/v2/fluid/layers.rst b/doc/api/v2/fluid/layers.rst index 231ec2d4ba102a5d31c47cbc7a5d484ef17a7f3a..e24613b94b422b7cdf9c6383c359fa92a4faf6ff 100644 --- a/doc/api/v2/fluid/layers.rst +++ b/doc/api/v2/fluid/layers.rst @@ -1,546 +1,799 @@ -========== -Layers -========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +====== +layers +====== -fc ---- -.. autofunction:: paddle.v2.fluid.layers.fc +control_flow +============ + +split_lod_tensor +---------------- + +.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor :noindex: -embedding ---------- -.. autofunction:: paddle.v2.fluid.layers.embedding +merge_lod_tensor +---------------- + +.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor :noindex: -dynamic_lstm ------------- -.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm +BlockGuard +---------- + +.. autoclass:: paddle.v2.fluid.layers.BlockGuard + :members: :noindex: -dynamic_lstmp -------------- -.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp +BlockGuardWithCompletion +------------------------ + +.. autoclass:: paddle.v2.fluid.layers.BlockGuardWithCompletion + :members: :noindex: -dynamic_gru ------------ -.. autofunction:: paddle.v2.fluid.layers.dynamic_gru +StaticRNNMemoryLink +------------------- + +.. autoclass:: paddle.v2.fluid.layers.StaticRNNMemoryLink + :members: :noindex: -data ----- -.. autofunction:: paddle.v2.fluid.layers.data +WhileGuard +---------- + +.. autoclass:: paddle.v2.fluid.layers.WhileGuard + :members: :noindex: -mean ----- -.. autofunction:: paddle.v2.fluid.layers.mean +While +----- + +.. autoclass:: paddle.v2.fluid.layers.While + :members: :noindex: -mul ---- -.. autofunction:: paddle.v2.fluid.layers.mul +lod_rank_table +-------------- + +.. autofunction:: paddle.v2.fluid.layers.lod_rank_table :noindex: -elementwise_add ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_add +max_sequence_len +---------------- + +.. autofunction:: paddle.v2.fluid.layers.max_sequence_len :noindex: -elementwise_sub ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_sub +topk +---- + +.. autofunction:: paddle.v2.fluid.layers.topk :noindex: -elementwise_mul ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_mul +lod_tensor_to_array +------------------- + +.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array :noindex: -elementwise_div ---------------- -.. autofunction:: paddle.v2.fluid.layers.elementwise_div +array_to_lod_tensor +------------------- + +.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor :noindex: +increment +--------- -dropout -------- -.. autofunction:: paddle.v2.fluid.layers.dropout +.. autofunction:: paddle.v2.fluid.layers.increment :noindex: +array_write +----------- -reshape --------- -.. autofunction:: paddle.v2.fluid.layers.reshape +.. autofunction:: paddle.v2.fluid.layers.array_write :noindex: +create_array +------------ -sigmoid +.. autofunction:: paddle.v2.fluid.layers.create_array + :noindex: + +less_than --------- -.. autofunction:: paddle.v2.fluid.layers.sigmoid + +.. autofunction:: paddle.v2.fluid.layers.less_than :noindex: +array_read +---------- -scale ---------- -.. autofunction:: paddle.v2.fluid.layers.scale +.. autofunction:: paddle.v2.fluid.layers.array_read + :noindex: + +shrink_memory +------------- + +.. autofunction:: paddle.v2.fluid.layers.shrink_memory :noindex: +array_length +------------ -transpose +.. autofunction:: paddle.v2.fluid.layers.array_length + :noindex: + +IfElse +------ + +.. autoclass:: paddle.v2.fluid.layers.IfElse + :members: + :noindex: + +DynamicRNN +---------- + +.. autoclass:: paddle.v2.fluid.layers.DynamicRNN + :members: + :noindex: + +ConditionalBlock +---------------- + +.. autoclass:: paddle.v2.fluid.layers.ConditionalBlock + :members: + :noindex: + +StaticRNN --------- -.. autofunction:: paddle.v2.fluid.layers.transpose + +.. autoclass:: paddle.v2.fluid.layers.StaticRNN + :members: :noindex: +reorder_lod_tensor_by_rank +-------------------------- -sigmoid_cross_entropy_with_logits ---------------------------------- -.. autofunction:: paddle.v2.fluid.layers.esigmoid_cross_entropy_with_logits +.. autofunction:: paddle.v2.fluid.layers.reorder_lod_tensor_by_rank :noindex: +ParallelDo +---------- -cast +.. autoclass:: paddle.v2.fluid.layers.ParallelDo + :members: + :noindex: + +Print +----- + +.. autofunction:: paddle.v2.fluid.layers.Print + :noindex: + +device +====== + +get_places +---------- + +.. autofunction:: paddle.v2.fluid.layers.get_places + :noindex: + +io +== + +data ---- -.. autofunction:: paddle.v2.fluid.layers.cast + +.. autofunction:: paddle.v2.fluid.layers.data :noindex: +BlockGuardServ +-------------- -concat -------- -.. autofunction:: paddle.v2.fluid.layers.concat +.. autoclass:: paddle.v2.fluid.layers.BlockGuardServ + :members: :noindex: +ListenAndServ +------------- -sums +.. autoclass:: paddle.v2.fluid.layers.ListenAndServ + :members: + :noindex: + +Send ---- -.. autofunction:: paddle.v2.fluid.layers.sums + +.. autofunction:: paddle.v2.fluid.layers.Send :noindex: +nn +== -linear_chain_crf ----------------- -.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf +fc +-- + +.. autofunction:: paddle.v2.fluid.layers.fc :noindex: +embedding +--------- -assign -------- .. autofunction:: paddle.v2.fluid.layers.embedding :noindex: +dynamic_lstm +------------ -split_lod_tensor ----------------- -.. autofunction:: paddle.v2.fluid.layers.split_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm :noindex: +dynamic_lstmp +------------- -merge_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.dynamic_lstmp + :noindex: + +dynamic_gru +----------- + +.. autofunction:: paddle.v2.fluid.layers.dynamic_gru + :noindex: + +gru_unit +-------- + +.. autofunction:: paddle.v2.fluid.layers.gru_unit + :noindex: + +linear_chain_crf ---------------- -.. autofunction:: paddle.v2.fluid.layers.merge_lod_tensor + +.. autofunction:: paddle.v2.fluid.layers.linear_chain_crf + :noindex: + +crf_decoding +------------ + +.. autofunction:: paddle.v2.fluid.layers.crf_decoding :noindex: cos_sim --------- +------- + .. autofunction:: paddle.v2.fluid.layers.cos_sim :noindex: - cross_entropy ------------- + .. autofunction:: paddle.v2.fluid.layers.cross_entropy :noindex: - - square_error_cost ----------------- + .. autofunction:: paddle.v2.fluid.layers.square_error_cost :noindex: - accuracy ---------- +-------- + .. autofunction:: paddle.v2.fluid.layers.accuracy :noindex: +chunk_eval +---------- + +.. autofunction:: paddle.v2.fluid.layers.chunk_eval + :noindex: sequence_conv ------------- + .. autofunction:: paddle.v2.fluid.layers.sequence_conv :noindex: - conv2d ------ + .. autofunction:: paddle.v2.fluid.layers.conv2d :noindex: - sequence_pool ------------- + .. autofunction:: paddle.v2.fluid.layers.sequence_pool :noindex: +pool2d +------ -sequence_first_step -------------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_first_step +.. autofunction:: paddle.v2.fluid.layers.pool2d :noindex: +batch_norm +---------- + +.. autofunction:: paddle.v2.fluid.layers.batch_norm + :noindex: -sequence_last_step +beam_search_decode ------------------ -.. autofunction:: paddle.v2.fluid.layers.sequence_last_step + +.. autofunction:: paddle.v2.fluid.layers.beam_search_decode :noindex: +conv2d_transpose +---------------- -pool2d ------- -.. autofunction:: paddle.v2.fluid.layers.pool2d +.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose :noindex: +sequence_expand +--------------- -batch_norm +.. autofunction:: paddle.v2.fluid.layers.sequence_expand + :noindex: + +lstm_unit +--------- + +.. autofunction:: paddle.v2.fluid.layers.lstm_unit + :noindex: + +reduce_sum ---------- -.. autofunction:: paddle.v2.fluid.layers.batch_norm + +.. autofunction:: paddle.v2.fluid.layers.reduce_sum + :noindex: + +reduce_mean +----------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_mean :noindex: +reduce_max +---------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_max + :noindex: -beam_search_decode +reduce_min +---------- + +.. autofunction:: paddle.v2.fluid.layers.reduce_min + :noindex: + +sequence_first_step +------------------- + +.. autofunction:: paddle.v2.fluid.layers.sequence_first_step + :noindex: + +sequence_last_step ------------------ -.. autofunction:: paddle.v2.fluid.layers.beam_search_decode + +.. autofunction:: paddle.v2.fluid.layers.sequence_last_step + :noindex: + +dropout +------- + +.. autofunction:: paddle.v2.fluid.layers.dropout :noindex: +split +----- -lod_rank_table --------------- -.. autofunction:: paddle.v2.fluid.layers.lod_rank_table +.. autofunction:: paddle.v2.fluid.layers.split :noindex: +ctc_greedy_decoder +------------------ -max_sequence_len ----------------- -.. autofunction:: paddle.v2.fluid.layers.max_sequence_len +.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder :noindex: +edit_distance +------------- -topk ------ -.. autofunction:: paddle.v2.fluid.layers.topk +.. autofunction:: paddle.v2.fluid.layers.edit_distance :noindex: +l2_normalize +------------ -lod_tensor_to_array -------------------- -.. autofunction:: paddle.v2.fluid.layers.lod_tensor_to_array +.. autofunction:: paddle.v2.fluid.layers.l2_normalize :noindex: +matmul +------ - -array_to_lod_tensor -------------------- -.. autofunction:: paddle.v2.fluid.layers.array_to_lod_tensor +.. autofunction:: paddle.v2.fluid.layers.matmul :noindex: +warpctc +------- +.. autofunction:: paddle.v2.fluid.layers.warpctc + :noindex: +sequence_reshape +---------------- -fill_constant -------------- -.. autofunction:: paddle.v2.fluid.layers.fill_constant +.. autofunction:: paddle.v2.fluid.layers.sequence_reshape :noindex: +transpose +--------- +.. autofunction:: paddle.v2.fluid.layers.transpose + :noindex: -fill_constant_batch_size_like ------------------------------ -.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like +im2sequence +----------- + +.. autofunction:: paddle.v2.fluid.layers.im2sequence :noindex: +nce +--- -ones ----- -.. autofunction:: paddle.v2.fluid.layers.ones +.. autofunction:: paddle.v2.fluid.layers.nce :noindex: +beam_search +----------- -zeros ------ -.. autofunction:: paddle.v2.fluid.layers.zeros +.. autofunction:: paddle.v2.fluid.layers.beam_search :noindex: +row_conv +-------- -increment ---------- -.. autofunction:: paddle.v2.fluid.layers.increment +.. autofunction:: paddle.v2.fluid.layers.row_conv :noindex: +multiplex +--------- -array_write ------------ -.. autofunction:: paddle.v2.fluid.layers.array_write +.. autofunction:: paddle.v2.fluid.layers.multiplex :noindex: +ops +=== +mean +---- -create_array ------------- -.. autofunction:: paddle.v2.fluid.layers.create_array +.. autofunction:: paddle.v2.fluid.layers.mean :noindex: +mul +--- -less_than ---------- -.. autofunction:: paddle.v2.fluid.layers.less_than +.. autofunction:: paddle.v2.fluid.layers.mul :noindex: +reshape +------- -array_read ----------- -.. autofunction:: paddle.v2.fluid.layers.array_read +.. autofunction:: paddle.v2.fluid.layers.reshape :noindex: +scale +----- -shrink_memory --------------- -.. autofunction:: paddle.v2.fluid.layers.shrink_memory +.. autofunction:: paddle.v2.fluid.layers.scale :noindex: +sigmoid_cross_entropy_with_logits +--------------------------------- -array_length -------------- -.. autofunction:: paddle.v2.fluid.layers.array_length +.. autofunction:: paddle.v2.fluid.layers.sigmoid_cross_entropy_with_logits :noindex: +elementwise_add +--------------- -conv2d_transpose ----------------- -.. autofunction:: paddle.v2.fluid.layers.conv2d_transpose +.. autofunction:: paddle.v2.fluid.layers.elementwise_add :noindex: - -sequence_expand +elementwise_div --------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_expand + +.. autofunction:: paddle.v2.fluid.layers.elementwise_div :noindex: +elementwise_sub +--------------- -gru_unit --------- -.. autofunction:: paddle.v2.fluid.layers.gru_unit +.. autofunction:: paddle.v2.fluid.layers.elementwise_sub :noindex: +elementwise_mul +--------------- -lstm_unit ---------- -.. autofunction:: paddle.v2.fluid.layers.lstm_unit +.. autofunction:: paddle.v2.fluid.layers.elementwise_mul :noindex: +elementwise_max +--------------- -sequence_softmax ----------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_softmax +.. autofunction:: paddle.v2.fluid.layers.elementwise_max :noindex: +elementwise_min +--------------- -reduce_sum ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_sum +.. autofunction:: paddle.v2.fluid.layers.elementwise_min :noindex: +elementwise_pow +--------------- -reduce_mean ------------ -.. autofunction:: paddle.v2.fluid.layers.reduce_mean +.. autofunction:: paddle.v2.fluid.layers.elementwise_pow :noindex: +clip +---- -reduce_max ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_max +.. autofunction:: paddle.v2.fluid.layers.clip :noindex: +clip_by_norm +------------ -reduce_min ----------- -.. autofunction:: paddle.v2.fluid.layers.reduce_min +.. autofunction:: paddle.v2.fluid.layers.clip_by_norm :noindex: +sequence_softmax +---------------- -split ------ -.. autofunction:: paddle.v2.fluid.layers.split +.. autofunction:: paddle.v2.fluid.layers.sequence_softmax :noindex: +sigmoid +------- -matmul ------- -.. autofunction:: paddle.v2.fluid.layers.matmul +.. autofunction:: paddle.v2.fluid.layers.sigmoid :noindex: logsigmoid ---------- + .. autofunction:: paddle.v2.fluid.layers.logsigmoid :noindex: exp --- + .. autofunction:: paddle.v2.fluid.layers.exp :noindex: relu ---- + .. autofunction:: paddle.v2.fluid.layers.relu :noindex: tanh ---- + .. autofunction:: paddle.v2.fluid.layers.tanh :noindex: tanh_shrink ----------- + .. autofunction:: paddle.v2.fluid.layers.tanh_shrink :noindex: softshrink ---------- + .. autofunction:: paddle.v2.fluid.layers.softshrink :noindex: sqrt ---- + .. autofunction:: paddle.v2.fluid.layers.sqrt :noindex: abs ----- +--- + .. autofunction:: paddle.v2.fluid.layers.abs :noindex: ceil ---- + .. autofunction:: paddle.v2.fluid.layers.ceil :noindex: floor ----- + .. autofunction:: paddle.v2.fluid.layers.floor :noindex: round ----- + .. autofunction:: paddle.v2.fluid.layers.round :noindex: reciprocal ---------- + .. autofunction:: paddle.v2.fluid.layers.reciprocal :noindex: log --- + .. autofunction:: paddle.v2.fluid.layers.log :noindex: square ------ + .. autofunction:: paddle.v2.fluid.layers.square :noindex: softplus -------- + .. autofunction:: paddle.v2.fluid.layers.softplus :noindex: softsign ---------- +-------- + .. autofunction:: paddle.v2.fluid.layers.softsign :noindex: brelu ----- + .. autofunction:: paddle.v2.fluid.layers.brelu :noindex: leaky_relu ---------- + .. autofunction:: paddle.v2.fluid.layers.leaky_relu :noindex: soft_relu --------- + .. autofunction:: paddle.v2.fluid.layers.soft_relu :noindex: elu ----- +--- + .. autofunction:: paddle.v2.fluid.layers.elu :noindex: relu6 ----- + .. autofunction:: paddle.v2.fluid.layers.relu6 :noindex: pow ----- +--- + .. autofunction:: paddle.v2.fluid.layers.pow :noindex: +stanh +----- + +.. autofunction:: paddle.v2.fluid.layers.stanh + :noindex: + hard_shrink ----------- + .. autofunction:: paddle.v2.fluid.layers.hard_shrink :noindex: thresholded_relu ---------------- + .. autofunction:: paddle.v2.fluid.layers.thresholded_relu :noindex: hard_sigmoid -------------- +------------ + .. autofunction:: paddle.v2.fluid.layers.hard_sigmoid :noindex: swish ------- +----- + .. autofunction:: paddle.v2.fluid.layers.swish :noindex: -im2sequence +tensor +====== + +create_tensor +------------- + +.. autofunction:: paddle.v2.fluid.layers.create_tensor + :noindex: + +create_parameter +---------------- + +.. autofunction:: paddle.v2.fluid.layers.create_parameter + :noindex: + +create_global_var +----------------- + +.. autofunction:: paddle.v2.fluid.layers.create_global_var + :noindex: + +cast +---- + +.. autofunction:: paddle.v2.fluid.layers.cast + :noindex: + +concat ------ -.. autofunction:: paddle.v2.fluid.layers.im2sequence + +.. autofunction:: paddle.v2.fluid.layers.concat :noindex: -edit_distance ---------------- -.. autofunction:: paddle.v2.fluid.layers.edit_distance_error +sums +---- + +.. autofunction:: paddle.v2.fluid.layers.sums :noindex: -ctc_greedy_decoder ---------------- -.. autofunction:: paddle.v2.fluid.layers.ctc_greedy_decoder +assign +------ + +.. autofunction:: paddle.v2.fluid.layers.assign :noindex: -l2_normalize ------------- -.. autofunction:: paddle.v2.fluid.layers.l2_normalize +fill_constant_batch_size_like +----------------------------- + +.. autofunction:: paddle.v2.fluid.layers.fill_constant_batch_size_like :noindex: -sequence_reshape ----------------- -.. autofunction:: paddle.v2.fluid.layers.sequence_reshape +fill_constant +------------- + +.. autofunction:: paddle.v2.fluid.layers.fill_constant :noindex: -row_conv --------- -.. autofunction:: paddle.v2.fluid.layers.row_conv +ones +---- + +.. autofunction:: paddle.v2.fluid.layers.ones :noindex: -multiplex ---------- -.. autofunction:: paddle.v2.fluid.layers.multiplex +zeros +----- + +.. autofunction:: paddle.v2.fluid.layers.zeros :noindex: + diff --git a/doc/api/v2/fluid/nets.rst b/doc/api/v2/fluid/nets.rst index 500019bc507f859c4c91de5d322a82eb1e78e2de..015581b7660848bdb0845fafe2d3fc05405e6ae6 100644 --- a/doc/api/v2/fluid/nets.rst +++ b/doc/api/v2/fluid/nets.rst @@ -1,33 +1,31 @@ -=========== -Nets -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +==== +nets +==== simple_img_conv_pool -------------------- -.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool - :noindex: - -img_conv_group ---------------- -.. autofunction:: paddle.v2.fluid.nets.img_conv_group +.. autofunction:: paddle.v2.fluid.nets.simple_img_conv_pool :noindex: - sequence_conv_pool ------------------ + .. autofunction:: paddle.v2.fluid.nets.sequence_conv_pool :noindex: - glu --- + .. autofunction:: paddle.v2.fluid.nets.glu :noindex: - scaled_dot_product_attention ---------------------------- + .. autofunction:: paddle.v2.fluid.nets.scaled_dot_product_attention :noindex: diff --git a/doc/api/v2/fluid/optimizer.rst b/doc/api/v2/fluid/optimizer.rst index 19b4940f08de3e2f7dc177f2961e538946d10a78..1691ebb9a7cb16da96e04147d0adea322374f529 100644 --- a/doc/api/v2/fluid/optimizer.rst +++ b/doc/api/v2/fluid/optimizer.rst @@ -1,54 +1,49 @@ -=========== -Optimizer -=========== - -Optimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: Optimizer - :noindex: +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +========= +optimizer +========= -SGDOptimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: SGDOptimizer - :noindex: +SGD +--- +.. autoclass:: paddle.v2.fluid.optimizer.SGD + :members: + :noindex: +Momentum +-------- -MomentumOptimizer ------------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: MomentumOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Momentum + :members: :noindex: +Adagrad +------- - -AdagradOptimizer ----------------- -.. automodule:: paddle.v2.fluid.optimizer - :members: AdagradOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adagrad + :members: :noindex: +Adam +---- -AdamOptimizer -------------- -.. automodule:: paddle.v2.fluid.optimizer - :members: AdamOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adam + :members: :noindex: +Adamax +------ -AdamaxOptimizer ------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: AdamaxOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.Adamax + :members: :noindex: +DecayedAdagrad +-------------- -DecayedAdagradOptimizer ------------------------ -.. automodule:: paddle.v2.fluid.optimizer - :members: DecayedAdagradOptimizer +.. autoclass:: paddle.v2.fluid.optimizer.DecayedAdagrad + :members: :noindex: diff --git a/doc/api/v2/fluid/param_attr.rst b/doc/api/v2/fluid/param_attr.rst index ca0c8af9e8c4f2271de7a131ad0d27c0e8635f50..8083d0d858dafcd275eaddb9b475875ee42ef724 100644 --- a/doc/api/v2/fluid/param_attr.rst +++ b/doc/api/v2/fluid/param_attr.rst @@ -1,11 +1,21 @@ -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +========== +param_attr +========== + ParamAttr -=========== +--------- +.. autoclass:: paddle.v2.fluid.param_attr.ParamAttr + :members: + :noindex: +WeightNormParamAttr +------------------- -ParamAttr ------------ -.. automodule:: paddle.v2.fluid.param_attr - :members: ParamAttr +.. autoclass:: paddle.v2.fluid.param_attr.WeightNormParamAttr + :members: :noindex: + diff --git a/doc/api/v2/fluid/profiler.rst b/doc/api/v2/fluid/profiler.rst index 7d4042d1f41c12c4a551ba6576559d612116872a..4a1ff7cb6976e0054f77428b699ea679aa91394f 100644 --- a/doc/api/v2/fluid/profiler.rst +++ b/doc/api/v2/fluid/profiler.rst @@ -1,10 +1,25 @@ -=========== -Profiler -=========== +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! +======== +profiler +======== +cuda_profiler +------------- -Profiler ------------ .. autofunction:: paddle.v2.fluid.profiler.cuda_profiler :noindex: + +reset_profiler +-------------- + +.. autofunction:: paddle.v2.fluid.profiler.reset_profiler + :noindex: + +profiler +-------- + +.. autofunction:: paddle.v2.fluid.profiler.profiler + :noindex: + diff --git a/doc/api/v2/fluid/regularizer.rst b/doc/api/v2/fluid/regularizer.rst index 868e225ed3d59e79aeb217fb88081ea25f80fa2c..2c17d15599baa1d02eb87c7b6c40034769ebb3a4 100644 --- a/doc/api/v2/fluid/regularizer.rst +++ b/doc/api/v2/fluid/regularizer.rst @@ -1,25 +1,27 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + =========== -Regularizer +regularizer =========== -WeightDecayRegularizer ----------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: WeightDecayRegularizer - :noindex: - +append_regularization_ops +------------------------- -L2DecayRegularizer ------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: L2DecayRegularizer +.. autofunction:: paddle.v2.fluid.regularizer.append_regularization_ops :noindex: +L1Decay +------- +.. autoclass:: paddle.v2.fluid.regularizer.L1Decay + :members: + :noindex: -L1DecayRegularizer -------------------- -.. automodule:: paddle.v2.fluid.regularizer - :members: L1DecayRegularizer +L2Decay +------- +.. autoclass:: paddle.v2.fluid.regularizer.L2Decay + :members: + :noindex: diff --git a/doc/design/speech/README.MD b/doc/design/speech/deep_speech_2.md similarity index 85% rename from doc/design/speech/README.MD rename to doc/design/speech/deep_speech_2.md index 7304650e628dba210488cd2dc4836318b5383b2a..cfdc4d6df04344c70d3334626bd38eca997c31ff 100644 --- a/doc/design/speech/README.MD +++ b/doc/design/speech/deep_speech_2.md @@ -140,7 +140,19 @@ TODO by Assignees ### Beam Search with CTC and LM -TODO by Assignees +
+
+Figure 2. Algorithm for CTC Beam Search Decoder. +
+ +- The **Beam Search Decoder** for DS2 CTC-trained network follows the similar approach in \[[3](#references)\] as shown in Figure 2, with two important modifications for the ambiguous parts: + - 1) in the iterative computation of probabilities, the assignment operation is changed to accumulation for one prefix may comes from different paths; + - 2) the if condition ```if l^+ not in A_prev then``` after probabilities' computation is deprecated for it is hard to understand and seems unnecessary. +- An **external scorer** would be passed into the decoder to evaluate a candidate prefix during decoding whenever a white space appended in English decoding and any character appended in Mandarin decoding. +- Such external scorer consists of language model, word count or any other custom scorers. +- The **language model** is built from Task 5, with parameters should be carefully tuned to achieve minimum WER/CER (c.f. Task 7) +- This decoder needs to perform with **high efficiency** for the convenience of parameters tuning and speech recognition in reality. + ## Future Work @@ -153,3 +165,4 @@ TODO by Assignees 1. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](http://proceedings.mlr.press/v48/amodei16.pdf). ICML 2016. 2. Dario Amodei, etc., [Deep Speech 2 : End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595). arXiv:1512.02595. +3. Awni Y. Hannun, etc. [First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs](https://arxiv.org/abs/1408.2873). arXiv:1408.2873 diff --git a/doc/design/speech/image/beam_search.png b/doc/design/speech/image/beam_search.png new file mode 100644 index 0000000000000000000000000000000000000000..7f7e35f34223162d0f7f0ed97375909c43b830ae Binary files /dev/null and b/doc/design/speech/image/beam_search.png differ diff --git a/doc/getstarted/build_and_install/build_from_source_cn.rst b/doc/getstarted/build_and_install/build_from_source_cn.rst index 71904dc41ed0d946867d890cc585e1b88450ca8c..ff904b1022a41612c9680dce92d3fc2c69ad7e93 100644 --- a/doc/getstarted/build_and_install/build_from_source_cn.rst +++ b/doc/getstarted/build_and_install/build_from_source_cn.rst @@ -115,7 +115,7 @@ PaddlePaddle的编译选项,包括生成CPU/GPU二进制文件、链接何种B "WITH_AVX", "是否编译含有AVX指令集的PaddlePaddle二进制文件", "ON" "WITH_PYTHON", "是否内嵌PYTHON解释器", "ON" "WITH_STYLE_CHECK", "是否编译时进行代码风格检查", "ON" - "WITH_TESTING", "是否开启单元测试", "ON" + "WITH_TESTING", "是否开启单元测试", "OFF" "WITH_DOC", "是否编译中英文文档", "OFF" "WITH_SWIG_PY", "是否编译PYTHON的SWIG接口,该接口可用于预测和定制化训练", "Auto" "WITH_GOLANG", "是否编译go语言的可容错parameter server", "ON" diff --git a/doc/getstarted/build_and_install/build_from_source_en.rst b/doc/getstarted/build_and_install/build_from_source_en.rst index 27f73b2e2c029b41d514e1612912ed1c335605b6..718fb869c23a1f7be82c87c726282bded9dad516 100644 --- a/doc/getstarted/build_and_install/build_from_source_en.rst +++ b/doc/getstarted/build_and_install/build_from_source_en.rst @@ -126,7 +126,7 @@ You can add :code:`-D` argument to pass such options, like: "WITH_AVX", "Build with AVX support", "ON" "WITH_PYTHON", "Build with integrated Python interpreter", "ON" "WITH_STYLE_CHECK", "Check code style when building", "ON" - "WITH_TESTING", "Build unit tests", "ON" + "WITH_TESTING", "Build unit tests", "OFF" "WITH_DOC", "Build documentations", "OFF" "WITH_SWIG_PY", "Build Python SWIG interface for V2 API", "Auto" "WITH_GOLANG", "Build fault-tolerant parameter server written in go", "ON" diff --git a/doc/getstarted/build_and_install/docker_install_cn.rst b/doc/getstarted/build_and_install/docker_install_cn.rst index 98fada7bdb46f4dd2927d6f93bcbcebbe7d18604..79d214635a069a739060e0b79424729f6ff90387 100644 --- a/doc/getstarted/build_and_install/docker_install_cn.rst +++ b/doc/getstarted/build_and_install/docker_install_cn.rst @@ -95,6 +95,12 @@ PaddlePaddle Book是为用户和开发者制作的一个交互式的Jupyter Note docker run -p 8888:8888 paddlepaddle/book +国内用户可以使用下面的镜像源来加速访问: + + .. code-block: bash + + docker run -p 8888:8888 docker.paddlepaddlehub.com/book + 然后在浏览器中输入以下网址: .. code-block:: text diff --git a/doc/getstarted/build_and_install/docker_install_en.rst b/doc/getstarted/build_and_install/docker_install_en.rst index b1d0890b4cdddb77114a80276130afd07c22d270..e0e0559fb858a093db96a9b4ec1c5a45d6c71a38 100644 --- a/doc/getstarted/build_and_install/docker_install_en.rst +++ b/doc/getstarted/build_and_install/docker_install_en.rst @@ -102,6 +102,12 @@ We provide a packaged book image, simply issue the command: docker run -p 8888:8888 paddlepaddle/book +For users in China, we provide a faster mirror: + + .. code-block: bash + + docker run -p 8888:8888 docker.paddlepaddlehub.com/book + Then, you would back and paste the address into the local browser: .. code-block:: text diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 318661af8bd04880577222fdc82cc1b6e79a457f..8b71f73c36c33d882b34c833031c50cd14817e76 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -22,11 +22,11 @@ cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory) -nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) +nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init) cc_test(variable_test SRCS variable_test.cc) -cc_library(threadpool SRCS threadpool.cc) +cc_library(threadpool SRCS threadpool.cc DEPS enforce) cc_test(threadpool_test SRCS threadpool_test.cc DEPS threadpool) cc_library(scope SRCS scope.cc DEPS glog threadpool) diff --git a/paddle/framework/channel.h b/paddle/framework/channel.h index 70ecccc1a1078374f3190b3956103ed8000c4fc5..0570980c5a4d7fa45e672ae5baac65d2c65ddad9 100644 --- a/paddle/framework/channel.h +++ b/paddle/framework/channel.h @@ -26,9 +26,7 @@ class Channel { virtual void Send(T*) = 0; virtual void Receive(T*) = 0; virtual size_t Cap() = 0; - - // Don't delete channels; instead, call Channel::Close. - protected: + virtual void Close() = 0; virtual ~Channel() {} }; @@ -50,11 +48,7 @@ Channel* MakeChannel(size_t buffer_size) { template void CloseChannel(Channel* ch) { - if (ch->Cap() > 0) { - delete dynamic_cast*>(ch); - } else { - delete dynamic_cast*>(ch); - } + ch->Close(); } } // namespace framework diff --git a/paddle/framework/channel_test.cc b/paddle/framework/channel_test.cc index 9efc0172658c800d14102531332dbef68fa392f4..1510fb8abf54f05804bd404d9bd00ecc42fbef63 100644 --- a/paddle/framework/channel_test.cc +++ b/paddle/framework/channel_test.cc @@ -14,13 +14,67 @@ limitations under the License. */ #include "paddle/framework/channel.h" +#include +#include + #include "gtest/gtest.h" +using paddle::framework::Channel; +using paddle::framework::MakeChannel; +using paddle::framework::CloseChannel; + TEST(Channel, MakeAndClose) { - using paddle::framework::Channel; - using paddle::framework::MakeChannel; - using paddle::framework::CloseChannel; + using paddle::framework::details::Buffered; + using paddle::framework::details::UnBuffered; + { + // MakeChannel should return a buffered channel is buffer_size > 0. + auto ch = MakeChannel(10); + EXPECT_NE(dynamic_cast*>(ch), nullptr); + EXPECT_EQ(dynamic_cast*>(ch), nullptr); + CloseChannel(ch); + delete ch; + } + { + // MakeChannel should return an un-buffered channel is buffer_size = 0. + auto ch = MakeChannel(0); + EXPECT_EQ(dynamic_cast*>(ch), nullptr); + EXPECT_NE(dynamic_cast*>(ch), nullptr); + CloseChannel(ch); + delete ch; + } +} + +TEST(Channel, SufficientBufferSizeDoesntBlock) { + const size_t buffer_size = 10; + auto ch = MakeChannel(buffer_size); + for (size_t i = 0; i < buffer_size; ++i) { + ch->Send(&i); // should not block + } + + size_t out; + for (size_t i = 0; i < buffer_size; ++i) { + ch->Receive(&out); // should not block + EXPECT_EQ(out, i); + } + CloseChannel(ch); + delete ch; +} + +TEST(Channel, ConcurrentSendNonConcurrentReceiveWithSufficientBufferSize) { + const size_t buffer_size = 10; + auto ch = MakeChannel(buffer_size); + size_t sum = 0; + std::thread t([&]() { + // Try to write more than buffer size. + for (size_t i = 0; i < 2 * buffer_size; ++i) { + ch->Send(&i); // should not block + sum += i; + } + }); + std::this_thread::sleep_for(std::chrono::milliseconds(100)); // wait 0.5 sec + EXPECT_EQ(sum, 45U); - Channel* ch = MakeChannel(10); CloseChannel(ch); + t.join(); + delete ch; } diff --git a/paddle/framework/details/buffered_channel.h b/paddle/framework/details/buffered_channel.h index 572e29d44a3baec84a029d87f9b0874784aa761b..b093e1589293b030ef2bedb82504a8e86b3dc857 100644 --- a/paddle/framework/details/buffered_channel.h +++ b/paddle/framework/details/buffered_channel.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "paddle/framework/channel.h" +#include "paddle/platform/enforce.h" namespace paddle { namespace framework { @@ -32,6 +33,8 @@ class Buffered : public paddle::framework::Channel { virtual void Send(T*); virtual void Receive(T*); virtual size_t Cap() { return cap_; } + virtual void Close(); + virtual ~Buffered(); private: size_t cap_; @@ -39,9 +42,11 @@ class Buffered : public paddle::framework::Channel { std::condition_variable empty_cond_var_; std::condition_variable full_cond_var_; std::deque channel_; + bool closed_; - Buffered(size_t cap) : cap_(cap) {} - virtual ~Buffered(); + Buffered(size_t cap) : cap_(cap), closed_(false) { + PADDLE_ENFORCE_GT(cap, 0); + } void NotifyAllSenders(std::unique_lock*); }; @@ -49,24 +54,39 @@ class Buffered : public paddle::framework::Channel { template void Buffered::Send(T* item) { std::unique_lock lock(mu_); - full_cond_var_.wait(lock, [this]() { return channel_.size() < cap_; }); - channel_.push_back(std::move(*item)); - lock.unlock(); - empty_cond_var_.notify_one(); + full_cond_var_.wait(lock, + [this]() { return channel_.size() < cap_ || closed_; }); + if (!closed_) { + channel_.push_back(std::move(*item)); + lock.unlock(); + empty_cond_var_.notify_one(); + } } template void Buffered::Receive(T* item) { std::unique_lock lock(mu_); - empty_cond_var_.wait(lock, [this]() { return !channel_.empty(); }); - *item = std::move(channel_.front()); - channel_.pop_front(); + empty_cond_var_.wait(lock, [this]() { return !channel_.empty() || closed_; }); + if (!closed_) { + *item = std::move(channel_.front()); + channel_.pop_front(); + NotifyAllSenders(&lock); + } else { + item = nullptr; + } +} + +template +void Buffered::Close() { + std::unique_lock lock(mu_); + closed_ = true; NotifyAllSenders(&lock); } template Buffered::~Buffered() { std::unique_lock lock(mu_); + closed_ = true; channel_.clear(); NotifyAllSenders(&lock); } @@ -74,7 +94,7 @@ Buffered::~Buffered() { template void Buffered::NotifyAllSenders(std::unique_lock* lock) { lock->unlock(); - full_cond_var_.notify_one(); + full_cond_var_.notify_all(); } } // namespace details diff --git a/paddle/framework/details/unbuffered_channel.h b/paddle/framework/details/unbuffered_channel.h index 7ecced1fba88fea781fc342091bc71e5aa496d3a..cc2d2e587eca981307d4e522bd569fbffa450207 100644 --- a/paddle/framework/details/unbuffered_channel.h +++ b/paddle/framework/details/unbuffered_channel.h @@ -32,10 +32,11 @@ class UnBuffered : public paddle::framework::Channel { virtual void Send(T*); virtual void Receive(T*); virtual size_t Cap() { return 0; } + virtual void Close(); + virtual ~UnBuffered(); private: UnBuffered() {} - virtual ~UnBuffered(); }; template @@ -44,6 +45,9 @@ void UnBuffered::Send(T* channel_element) {} template void UnBuffered::Receive(T*) {} +template +void UnBuffered::Close() {} + template UnBuffered::~UnBuffered() {} diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index cbf3ec75265fa74aaacffee684b7b7d5f73b7c02..9a232b08434d299d10bb2acdb6e96295de875d56 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -25,7 +25,7 @@ limitations under the License. */ #include "paddle/platform/place.h" #include "paddle/platform/profiler.h" -DECLARE_bool(do_memory_benchmark); +DECLARE_bool(benchmark); DEFINE_bool(check_nan_inf, false, "Checking whether operator produce NAN/INF or not. It will be " "extremely slow so please use this flag wisely."); @@ -33,9 +33,6 @@ DEFINE_bool(check_nan_inf, false, namespace paddle { namespace framework { -const std::string kFeedOpType = "feed"; -const std::string kFetchOpType = "fetch"; - Executor::Executor(const platform::Place& place) : place_(place) {} static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { @@ -125,7 +122,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, op->Run(*local_scope, place_); VLOG(3) << op->DebugStringEx(local_scope); - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { VLOG(2) << "Memory used after operator " + op->Type() + " running: " << memory::memory_usage(place_); } @@ -142,7 +139,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, if (create_vars && create_local_scope) { scope->DeleteScope(local_scope); } - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { VLOG(2) << "-------------------------------------------------------"; VLOG(2) << "Memory used after deleting local scope: " << memory::memory_usage(place_); diff --git a/paddle/framework/feed_fetch_type.h b/paddle/framework/feed_fetch_type.h index 9bc4a90c44828ecb7458d524f59609f01848cc5c..168f456675af508df86dd0520cdeb5d16d94ad31 100644 --- a/paddle/framework/feed_fetch_type.h +++ b/paddle/framework/feed_fetch_type.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include "paddle/framework/lod_tensor.h" @@ -20,5 +21,8 @@ namespace paddle { namespace framework { using FeedFetchType = LoDTensor; using FeedFetchList = std::vector; + +static const std::string kFeedOpType = "feed"; +static const std::string kFetchOpType = "fetch"; } // namespace framework } // namespace paddle diff --git a/paddle/framework/init.cc b/paddle/framework/init.cc index 4ef82a541efaa35bcf831d5122570154f2fa2423..3f6ea121b3994979d89a7d5a8c20c59240a0c111 100644 --- a/paddle/framework/init.cc +++ b/paddle/framework/init.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include // for strdup #include +#include #include #include "paddle/framework/init.h" @@ -46,17 +47,23 @@ void InitDevices() { std::vector places; places.emplace_back(platform::CPUPlace()); + int count = 0; #ifdef PADDLE_WITH_CUDA - int count = platform::GetCUDADeviceCount(); - for (int i = 0; i < count; ++i) { - places.emplace_back(platform::CUDAPlace(i)); + try { + count = platform::GetCUDADeviceCount(); + } catch (const std::exception &exp) { + LOG(WARNING) << "Compiled with WITH_GPU, but no GPU found in runtime."; } #else LOG(WARNING) - << "'GPU' is not supported, Please re-compile with WITH_GPU option"; + << "'CUDA' is not supported, Please re-compile with WITH_GPU option"; #endif + for (int i = 0; i < count; ++i) { + places.emplace_back(platform::CUDAPlace(i)); + } + platform::DeviceContextPool::Init(places); } diff --git a/paddle/framework/init_test.cc b/paddle/framework/init_test.cc index f837a965d3be7d40c20803ae4462b3bfd91bffd0..01e076dd8ea24831e3ed7c8a7f8fae6818a89335 100644 --- a/paddle/framework/init_test.cc +++ b/paddle/framework/init_test.cc @@ -20,7 +20,21 @@ TEST(InitDevices, CPU) { using paddle::framework::InitDevices; using paddle::platform::DeviceContextPool; +#ifndef PADDLE_WITH_CUDA InitDevices(); DeviceContextPool& pool = DeviceContextPool::Instance(); - ASSERT_GE(pool.size(), 1U); + ASSERT_EQ(pool.size(), 1U); +#endif +} + +TEST(InitDevices, CUDA) { + using paddle::framework::InitDevices; + using paddle::platform::DeviceContextPool; + +#ifdef PADDLE_WITH_CUDA + int count = paddle::platform::GetCUDADeviceCount(); + InitDevices(); + DeviceContextPool& pool = DeviceContextPool::Instance(); + ASSERT_EQ(pool.size(), 1U + static_cast(count)); +#endif } diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 53b0d0fe083579da4f0bb600f292765aa2aa0d8a..cb27de6991674247e6215ce64a2da5000fa78ed4 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -24,8 +24,6 @@ limitations under the License. */ #include #include -#include - namespace paddle { namespace framework { diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 9d1294fdeb9bd76bf944f7ec3687e3c5bb333241..d0ab640485baf6d76ee629ea420b603f42b031b4 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -18,11 +18,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include -#include #endif #include #include "paddle/framework/ddim.h" +#include "paddle/framework/mixed_vector.h" #include "paddle/framework/tensor.h" #include "paddle/framework/tensor_util.h" #include "paddle/platform/enforce.h" @@ -31,15 +31,6 @@ limitations under the License. */ namespace paddle { namespace framework { -#ifndef PADDLE_WITH_CUDA -template -using Vector = std::vector; -#else -template -using Vector = thrust::host_vector< - T, thrust::system::cuda::experimental::pinned_allocator>; -#endif - /* * LoD is short for Level of Details. * @@ -55,7 +46,15 @@ using Vector = thrust::host_vector< * 0 2 4 7 * 0 2 5 7 10 12 15 20 */ -using LoD = std::vector>; +struct LoD : public std::vector> { + using std::vector>::vector; + + void CopyFromCUDA() { + for (auto it = this->begin(); it != this->end(); ++it) { + it->CopyFromCUDA(); + } + } +}; std::ostream& operator<<(std::ostream& os, const LoD& lod); std::ostream& operator<<(std::ostream& os, const LoDTensor& t); @@ -109,7 +108,10 @@ bool CheckAbsLoD(const LoD& in, int tensor_height = -1); */ class LoDTensor : public Tensor { public: - LoDTensor() {} + LoDTensor() : Tensor() {} + + /* Constructor with place should only be used in pybind */ + explicit LoDTensor(const platform::Place& place) : Tensor(place) {} explicit LoDTensor(const LoD& lod) : lod_(lod) {} diff --git a/paddle/framework/lod_tensor_test.cc b/paddle/framework/lod_tensor_test.cc index 4d172c43c7cceacb7d0dfaf1c4d3028717350268..3b63020e685436396071fa05cd7697630ae56c95 100644 --- a/paddle/framework/lod_tensor_test.cc +++ b/paddle/framework/lod_tensor_test.cc @@ -23,6 +23,17 @@ namespace paddle { namespace framework { +TEST(LoD, data) { + LoD lod{{0, 1, 2}}; + lod.push_back({0, 2, 4, 5}); + lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); + + auto& v = lod[0]; + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], i); + } +} + TEST(LodExpand, test) { LoD lod{{0, 2}}; LoDTensor tensor; diff --git a/paddle/framework/lod_tensor_test.cu b/paddle/framework/lod_tensor_test.cu index 1e253a2f6f35e827fb2e5db6270da03705b39514..d4c9f00bd9c00f3cae68858ca46c5320fc117405 100644 --- a/paddle/framework/lod_tensor_test.cu +++ b/paddle/framework/lod_tensor_test.cu @@ -14,6 +14,8 @@ #include #include +#include +#include "paddle/framework/init.h" #include "paddle/framework/lod_tensor.h" #include "paddle/platform/assert.h" @@ -26,7 +28,48 @@ __global__ void test(size_t* a, int size) { } } +TEST(Vector, Normal) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::memory; + + paddle::framework::InitDevices(); + + paddle::framework::Vector vec({1, 2, 3}); + size_t* ptr = vec.data(); + for (size_t i = 0; i < vec.size(); ++i) { + EXPECT_EQ(vec[i], *(ptr + i)); + } + + vec.clear(); + vec.CopyFromCUDA(); + + std::vector v = {1, 2, 3}; + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], vec[i]); + } +} + +TEST(LoD, data) { + paddle::framework::InitDevices(); + + paddle::framework::LoD lod{{0, 1, 2}}; + lod.push_back({0, 2, 4, 5}); + lod.push_back(std::vector({0, 1, 6, 8, 10, 11})); + + auto& v = lod[0]; + test<<<1, 1>>>(v.cuda_data(), v.size()); + cudaDeviceSynchronize(); + + v.CopyFromCUDA(); + for (size_t i = 0; i < v.size(); ++i) { + EXPECT_EQ(v[i], i * 2); + } +} + TEST(LoDTensor, LoDInGPU) { + paddle::framework::InitDevices(); + paddle::framework::LoDTensor lod_tensor; paddle::platform::CUDAPlace place(0); @@ -42,8 +85,9 @@ TEST(LoDTensor, LoDInGPU) { auto lod = lod_tensor.lod(); - test<<<1, 8>>>(lod[0].data(), lod[0].size()); + test<<<1, 8>>>(lod[0].cuda_data(), lod[0].size()); cudaDeviceSynchronize(); + lod.CopyFromCUDA(); for (size_t i = 0; i < src_lod[0].size(); ++i) { EXPECT_EQ(lod[0].data()[i], src_lod[0].data()[i] * 2); diff --git a/paddle/framework/mixed_vector.h b/paddle/framework/mixed_vector.h new file mode 100644 index 0000000000000000000000000000000000000000..0e0e23958602343f8e0106e3a88eaac9c6d71066 --- /dev/null +++ b/paddle/framework/mixed_vector.h @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +#include "paddle/memory/memcpy.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/device_context.h" +#include "paddle/platform/enforce.h" +#include "paddle/platform/place.h" + +namespace paddle { +namespace framework { + +/** + * @brief Vector support both cpu and gpu. + * host vector lifetime is same with Vector + * device vector is lazily malloc and modified. + */ + +template +class Vector : public std::vector { + public: + /* NOTE(dzhwinter): + * Data always store and modified on Host. + * If the data is modified when use cuda_data interface, + * You need to call the CopyFromCUDA explicitly to synchronize data. + * + */ + enum class kDataPosition { + kDataOnHost = 0, + kDataOnDevice = 1, + }; + + public: + using std::vector::vector; + + Vector() {} + Vector(const std::vector &v) : std::vector(v) {} // NOLINT + + virtual ~Vector() { +#ifdef PADDLE_WITH_CUDA + if (cuda_ptr_ != nullptr) { + memory::Free(place_, static_cast(cuda_ptr_)); + } +#endif + } + + T *cuda_data() { + CopyToCUDA(); + PADDLE_ENFORCE_NOT_NULL( + cuda_ptr_, "No data or Insufficient CUDA memory to allocation"); + return static_cast(cuda_ptr_); + } + + T *data() { return std::vector::data(); } + + const T *data() const { return std::vector::data(); } + + void CopyToCUDA(); + + void CopyFromCUDA(); + + void CopyToPeer(platform::Place); + + private: + void *cuda_ptr_ = nullptr; + size_t cuda_size_ = 0; + /*The DataPosition is unused now, + if we want support random access from cpu and cuda, + we need to overload all the vector method */ + + kDataPosition position_ = kDataPosition::kDataOnHost; + platform::CUDAPlace place_; +}; + +template +void Vector::CopyToCUDA() { +#ifdef PADDLE_WITH_CUDA + if (cuda_ptr_ == nullptr) { + cuda_ptr_ = + memory::Alloc(place_, this->size() * sizeof(T)); + } + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *cuda_ctx = pool.GetByPlace(place_); + + memory::Copy(place_, static_cast(cuda_ptr_), platform::CPUPlace(), + static_cast(this->data()), + this->size() * sizeof(T), cuda_ctx->stream()); + cuda_ctx->Wait(); + + cuda_size_ = this->size(); +#endif +} + +template +void Vector::CopyFromCUDA() { +#ifdef PADDLE_WITH_CUDA + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *cuda_ctx = pool.GetByPlace(place_); + if (cuda_ptr_ == nullptr) { + LOG(WARNING) << "No uncommited cuda data."; + return; + } + this->resize(cuda_size_); + memory::Copy(platform::CPUPlace(), static_cast(this->data()), place_, + static_cast(cuda_ptr_), this->size() * sizeof(T), + cuda_ctx->stream()); + cuda_ctx->Wait(); + +#endif +} + +template +void Vector::CopyToPeer(platform::Place peer_place) { + if (platform::is_cpu_place(peer_place)) { + return; + } +#ifdef PADDLE_WITH_CUDA + auto *cuda_ctx = platform::DeviceContextPool::Instance().GetByPlace(place_); + void *peer_cuda_ptr_ = memory::Alloc( + boost::get(peer_place), this->size() * sizeof(T)); + memory::Copy(boost::get(peer_place), + static_cast(peer_cuda_ptr_), place_, + static_cast(cuda_ptr_), this->size() * sizeof(T), + cuda_ctx->stream()); + cuda_ctx->Wait(); + memory::Free(place_, static_cast(cuda_ptr_)); + place_ = boost::get(peer_place); + cuda_ptr_ = peer_cuda_ptr_; +#endif +} + +template class Vector; +template class Vector; +template class Vector; +template class Vector; + +} // namespace framework +} // namespace paddle diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 831b1e2a1e10777d9e89364adcd4b1f367e86080..4e854f54dd43d760bab44fb5f7cafeb13314b27c 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -22,9 +22,7 @@ limitations under the License. */ #include "paddle/framework/shape_inference.h" #include "paddle/framework/var_type.h" -DEFINE_bool(op_sync, false, - "Default cuda is asynchronous device, set to True will" - "force op run in synchronous mode."); +DECLARE_bool(benchmark); namespace paddle { namespace framework { @@ -531,7 +529,7 @@ void OperatorWithKernel::Run(const Scope& scope, ExecutionContext(*this, new_scope, *new_dev_ctx)); /*For profiling/benchmark only*/ - if (FLAGS_op_sync) { + if (FLAGS_benchmark) { new_dev_ctx->Wait(); } } diff --git a/paddle/framework/program_desc.cc b/paddle/framework/program_desc.cc index b5d9e5e385c1ba57169ef885824fc23b0f130692..15ea4035c6e6193105b621210a900e74d1466941 100644 --- a/paddle/framework/program_desc.cc +++ b/paddle/framework/program_desc.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/framework/program_desc.h" #include "paddle/framework/block_desc.h" +#include "paddle/framework/feed_fetch_type.h" namespace paddle { namespace framework { @@ -64,5 +65,27 @@ ProgramDesc::ProgramDesc(const std::string &binary_str) { } } +const std::vector ProgramDesc::GetFeedTargetNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector feed_target_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFeedOpType) { + feed_target_names.insert(feed_target_names.begin(), op->Output("Out")[0]); + } + } + return feed_target_names; +} + +const std::vector ProgramDesc::GetFetchTargetNames() { + BlockDesc *global_block = blocks_[0].get(); + std::vector fetch_target_names; + for (auto *op : global_block->AllOps()) { + if (op->Type() == kFetchOpType) { + fetch_target_names.push_back(op->Input("X")[0]); + } + } + return fetch_target_names; +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/program_desc.h b/paddle/framework/program_desc.h index 15a962bb696d6172acd1a83cf9bb1ffd0846d449..8e958eab6ee08436ca73b13bac010e66c7df2b8b 100644 --- a/paddle/framework/program_desc.h +++ b/paddle/framework/program_desc.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/framework/block_desc.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/proto_desc.h" #include "paddle/platform/macros.h" @@ -45,6 +46,9 @@ class ProgramDesc { proto::ProgramDesc *Proto(); + const std::vector GetFeedTargetNames(); + const std::vector GetFetchTargetNames(); + private: proto::ProgramDesc desc_; diff --git a/paddle/framework/prune.cc b/paddle/framework/prune.cc index 25eb813ffb96e9b1e13299421ead9f85c02da59f..bff8e0bceaca9749101b2c45edddba526d565624 100644 --- a/paddle/framework/prune.cc +++ b/paddle/framework/prune.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include +#include #include #include @@ -102,6 +103,32 @@ void prune_impl(const proto::ProgramDesc& input, proto::ProgramDesc* output, *op_field->Add() = input.blocks(block_id).ops(i); } } + + // remove the VarDescs in BlockDesc that are not referenced in + // the pruned OpDescs + std::unordered_map var_map; + auto* var_field = output->mutable_blocks(block_id)->mutable_vars(); + for (const auto& var : *var_field) { + var_map[var.name()] = var; + } + + var_field->Clear(); + for (const auto& op : *op_field) { + // add VarDescs of all input arguments for each OpDesc + auto& input_field = op.inputs(); + for (auto& input_var : input_field) { + for (auto& arg : input_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + // add VarDescs of all output arguments for each OpDesc + auto& output_field = op.outputs(); + for (auto& output_var : output_field) { + for (auto& arg : output_var.arguments()) { + *var_field->Add() = var_map[arg]; + } + } + } } // TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies diff --git a/paddle/framework/scope.cc b/paddle/framework/scope.cc index a67ff910093d93060d07d849f6e968e5f4ce21cd..af08b2ab816f63c05d4c65df9601c787e57994f5 100644 --- a/paddle/framework/scope.cc +++ b/paddle/framework/scope.cc @@ -20,9 +20,11 @@ limitations under the License. */ #include "paddle/framework/threadpool.h" #include "paddle/string/printf.h" -DEFINE_bool(do_memory_benchmark, false, +DEFINE_bool(benchmark, false, "Doing memory benchmark. It will make deleting scope synchronized, " - "and add some memory usage logs"); + "and add some memory usage logs." + "Default cuda is asynchronous device, set to True will" + "force op run in synchronous mode."); namespace paddle { namespace framework { @@ -93,7 +95,7 @@ void Scope::DeleteScope(Scope* scope) { PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. - if (FLAGS_do_memory_benchmark) { + if (FLAGS_benchmark) { delete scope; } else { Async([scope] { delete scope; }); diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index 4aaa29d794c95592832a1fe990e2dce274eba9d5..f0ea709a5c37e769e3ffa1b2e9d1e39721979251 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -47,6 +47,11 @@ class Tensor { public: Tensor() : offset_(0) {} + /*! Constructor with place should only be used in pybind. */ + explicit Tensor(const platform::Place& place) : offset_(0) { + holder_->set_place(place); + } + /*! Return a pointer to mutable memory block. */ template inline T* data(); @@ -137,6 +142,7 @@ class Tensor { virtual std::type_index type() const = 0; virtual platform::Place place() const = 0; virtual void set_type(std::type_index type) = 0; + virtual void set_place(platform::Place place) = 0; }; template @@ -156,6 +162,7 @@ class Tensor { virtual void* ptr() const { return static_cast(ptr_.get()); } virtual std::type_index type() const { return type_; } virtual void set_type(std::type_index type) { type_ = type; } + virtual void set_place(platform::Place place) { place_ = place; } /*! the pointer of memory block. */ std::unique_ptr> ptr_; diff --git a/paddle/framework/threadpool.cc b/paddle/framework/threadpool.cc index b2f5ae4a96593fde1623dd10d3b63c984ae228db..b7d7c00bcf9d9770f58284023ca2defcda299d64 100644 --- a/paddle/framework/threadpool.cc +++ b/paddle/framework/threadpool.cc @@ -14,6 +14,8 @@ #include "paddle/framework/threadpool.h" +#include "paddle/platform/enforce.h" + namespace paddle { namespace framework { diff --git a/paddle/framework/threadpool.h b/paddle/framework/threadpool.h index 8912b1a43a26f9df662d3b5ddf68bfb2b87f4a20..4e9b58679d9e7c84adf76b6245b397c7a8872483 100644 --- a/paddle/framework/threadpool.h +++ b/paddle/framework/threadpool.h @@ -22,7 +22,7 @@ limitations under the License. */ #include #include -#include "paddle/platform/enforce.h" +#include "paddle/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace paddle { namespace framework { diff --git a/paddle/inference/CMakeLists.txt b/paddle/inference/CMakeLists.txt index ae4d3fd2f58daf87a650428e04722581610ed780..2289ddc139cbddfbaa5238e683b2f8e784a7291e 100644 --- a/paddle/inference/CMakeLists.txt +++ b/paddle/inference/CMakeLists.txt @@ -1,14 +1,14 @@ -set(FLUID_CORE_MODULES proto_desc paddle_memory executor prune init) +set(FLUID_CORE_MODULES proto_desc paddle_memory lod_tensor executor prune init) cc_library(paddle_fluid_api - SRCS inference.cc + SRCS io.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Merge all modules into a single static library cc_library(paddle_fluid DEPS paddle_fluid_api ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) # Create shared library -add_library(paddle_fluid_shared SHARED inference.cc) +add_library(paddle_fluid_shared SHARED io.cc) target_circle_link_libraries(paddle_fluid_shared ARCHIVE_START @@ -20,23 +20,10 @@ SET_TARGET_PROPERTIES(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) # install library & headers if(NOT WITH_C_API AND WITH_FLUID) - install(FILES inference.h DESTINATION include/paddle/inference) + install(FILES io.h DESTINATION include/paddle/inference) install(TARGETS paddle_fluid_shared DESTINATION lib) endif() -add_executable(example example.cc) -if(APPLE) - set(OPTIONAL_LINK_FLAGS) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang") - set(OPTIONAL_LINK_FLAGS "-undefined dynamic_lookup") - endif() - target_link_libraries(example - -Wl,-force_load paddle_fluid - ${OPTIONAL_LINK_FLAGS} - ${PTOOLS_LIB}) -else() - target_link_libraries(example - -Wl,--start-group -Wl,--whole-archive paddle_fluid - -Wl,--no-whole-archive -Wl,--end-group - ${PTOOLS_LIB}) +if(WITH_TESTING) + add_subdirectory(tests/book) endif() diff --git a/paddle/inference/example.cc b/paddle/inference/example.cc deleted file mode 100644 index 0c18b45624dedcb5839d4b771e044b4a7b32af52..0000000000000000000000000000000000000000 --- a/paddle/inference/example.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include "gflags/gflags.h" -#include "paddle/inference/inference.h" - -DEFINE_string(dirname, "", "Directory of the inference model."); - -int main(int argc, char** argv) { - google::ParseCommandLineFlags(&argc, &argv, true); - if (FLAGS_dirname.empty()) { - // Example: - // ./example --dirname=recognize_digits_mlp.inference.model - std::cout << "Usage: ./example --dirname=path/to/your/model" << std::endl; - exit(1); - } - - std::cout << "FLAGS_dirname: " << FLAGS_dirname << std::endl; - std::string dirname = FLAGS_dirname; - - paddle::InferenceEngine* engine = new paddle::InferenceEngine(); - engine->LoadInferenceModel(dirname); - - paddle::framework::LoDTensor input; - srand(time(0)); - float* input_ptr = - input.mutable_data({1, 784}, paddle::platform::CPUPlace()); - for (int i = 0; i < 784; ++i) { - input_ptr[i] = rand() / (static_cast(RAND_MAX)); - } - - std::vector feeds; - feeds.push_back(input); - std::vector fetchs; - engine->Execute(feeds, fetchs); - - for (size_t i = 0; i < fetchs.size(); ++i) { - auto dims_i = fetchs[i].dims(); - std::cout << "dims_i:"; - for (int j = 0; j < dims_i.size(); ++j) { - std::cout << " " << dims_i[j]; - } - std::cout << std::endl; - std::cout << "result:"; - float* output_ptr = fetchs[i].data(); - for (int j = 0; j < paddle::framework::product(dims_i); ++j) { - std::cout << " " << output_ptr[j]; - } - std::cout << std::endl; - } - - delete engine; - return 0; -} diff --git a/paddle/inference/inference.cc b/paddle/inference/inference.cc deleted file mode 100644 index b43c359ed1787143403336e8c1cb4c7f85b1d7a2..0000000000000000000000000000000000000000 --- a/paddle/inference/inference.cc +++ /dev/null @@ -1,187 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "inference.h" -#include -#include "paddle/framework/executor.h" -#include "paddle/framework/init.h" -#include "paddle/framework/scope.h" - -namespace paddle { - -void InferenceEngine::LoadInferenceModel(const std::string& dirname) { - std::string model_filename = dirname + "/__model__"; - LOG(INFO) << "loading model from " << model_filename; - std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); - std::string program_desc_str; - inputfs.seekg(0, std::ios::end); - program_desc_str.resize(inputfs.tellg()); - inputfs.seekg(0, std::ios::beg); - LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); - inputfs.read(&program_desc_str[0], program_desc_str.size()); - inputfs.close(); - - program_ = new framework::ProgramDesc(program_desc_str); - GenerateLoadProgram(dirname); - - framework::BlockDesc* global_block = program_->MutableBlock(0); - feed_var_names_.clear(); - fetch_var_names_.clear(); - for (auto* op : global_block->AllOps()) { - if (op->Type() == "feed") { - feed_var_names_.insert(feed_var_names_.begin(), op->Output("Out")[0]); - } else if (op->Type() == "fetch") { - fetch_var_names_.push_back(op->Input("X")[0]); - } - } -} - -bool InferenceEngine::IsParameter(const framework::VarDesc* var) { - if (var->Persistable()) { - // There are many unreachable variables in the program - for (size_t i = 0; i < program_->Size(); ++i) { - const framework::BlockDesc& block = program_->Block(i); - for (auto* op : block.AllOps()) { - if (op->Type() == "feed") { - continue; - } - for (auto input_argument_name : op->InputArgumentNames()) { - if (input_argument_name == var->Name()) { - return true; - } - } - } - } - } - return false; -} - -void InferenceEngine::GenerateLoadProgram(const std::string& dirname) { - framework::BlockDesc* global_block = program_->MutableBlock(0); - - load_program_ = new framework::ProgramDesc(); - framework::BlockDesc* load_block = load_program_->MutableBlock(0); - for (auto* var : global_block->AllVars()) { - if (IsParameter(var)) { - LOG(INFO) << "parameter's name: " << var->Name(); - - framework::VarDesc* new_var = load_block->Var(var->Name()); - new_var->SetShape(var->Shape()); - new_var->SetDataType(var->GetDataType()); - new_var->SetType(var->GetType()); - new_var->SetLoDLevel(var->GetLoDLevel()); - new_var->SetPersistable(true); - - // append_op - framework::OpDesc* op = load_block->AppendOp(); - op->SetType("load"); - op->SetOutput("Out", {new_var->Name()}); - op->SetAttr("file_path", {dirname + "/" + new_var->Name()}); - op->CheckAttrs(); - } - } -} - -void InferenceEngine::PrependFeedOp() { - if (!program_) { - LOG(FATAL) << "Please initialize the program_ first."; - } - - framework::BlockDesc* global_block = program_->MutableBlock(0); - - // create_var - framework::VarDesc* feed_var = global_block->Var("feed"); - feed_var->SetType(framework::proto::VarDesc::FEED_MINIBATCH); - feed_var->SetPersistable(true); - - // prepend feed_op - for (size_t i = 0; i < feed_var_names_.size(); ++i) { - std::string var_name = feed_var_names_[i]; - LOG(INFO) << "feed var's name: " << var_name; - - // prepend_op - framework::OpDesc* op = global_block->PrependOp(); - op->SetType("feed"); - op->SetInput("X", {"feed"}); - op->SetOutput("Out", {var_name}); - op->SetAttr("col", {static_cast(i)}); - op->CheckAttrs(); - } -} - -void InferenceEngine::AppendFetchOp() { - if (!program_) { - LOG(FATAL) << "Please initialize the program_ first."; - } - - framework::BlockDesc* global_block = program_->MutableBlock(0); - - // create_var - framework::VarDesc* fetch_var = global_block->Var("fetch"); - fetch_var->SetType(framework::proto::VarDesc::FETCH_LIST); - fetch_var->SetPersistable(true); - - // append fetch_op - for (size_t i = 0; i < fetch_var_names_.size(); ++i) { - std::string var_name = fetch_var_names_[i]; - LOG(INFO) << "fetch var's name: " << var_name; - - // append_op - framework::OpDesc* op = global_block->AppendOp(); - op->SetType("fetch"); - op->SetInput("X", {var_name}); - op->SetOutput("Out", {"fetch"}); - op->SetAttr("col", {static_cast(i)}); - op->CheckAttrs(); - } -} - -void InferenceEngine::Execute(const std::vector& feeds, - std::vector& fetchs) { - if (!program_ || !load_program_) { - LOG(FATAL) << "Please initialize the program_ and load_program_ first."; - } - - if (feeds.size() != feed_var_names_.size()) { - LOG(FATAL) << "Please feed " << feed_var_names_.size() << " input Tensors."; - } - - auto* place = new platform::CPUPlace(); - framework::InitDevices(); - framework::Executor* executor = new framework::Executor(*place); - framework::Scope* scope = new framework::Scope(); - - executor->Run(*load_program_, scope, 0, true, true); - - std::map feed_targets; - std::map fetch_targets; - - // set_feed_variable - for (size_t i = 0; i < feed_var_names_.size(); ++i) { - feed_targets[feed_var_names_[i]] = &feeds[i]; - } - - // get_fetch_variable - fetchs.resize(fetch_var_names_.size()); - for (size_t i = 0; i < fetch_var_names_.size(); ++i) { - fetch_targets[fetch_var_names_[i]] = &fetchs[i]; - } - - executor->Run(*program_, scope, feed_targets, fetch_targets); - - delete place; - delete scope; - delete executor; -} -} // namespace paddle diff --git a/paddle/inference/inference.h b/paddle/inference/inference.h deleted file mode 100644 index 26f259824b945e260b370ced9d065842264075d5..0000000000000000000000000000000000000000 --- a/paddle/inference/inference.h +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/framework/block_desc.h" -#include "paddle/framework/lod_tensor.h" -#include "paddle/framework/program_desc.h" - -namespace paddle { - -class InferenceEngine { -public: - InferenceEngine() : program_(nullptr), load_program_(nullptr) {} - ~InferenceEngine() { - delete program_; - delete load_program_; - } - - void LoadInferenceModel(const std::string& dirname); - void Execute(const std::vector& feeds, - std::vector& fetchs); - -private: - bool IsParameter(const framework::VarDesc* var); - void GenerateLoadProgram(const std::string& dirname); - void PrependFeedOp(); - void AppendFetchOp(); - -private: - framework::ProgramDesc* program_; - framework::ProgramDesc* load_program_; - std::vector feed_var_names_; - std::vector fetch_var_names_; -}; - -} // namespace paddle diff --git a/paddle/inference/io.cc b/paddle/inference/io.cc new file mode 100644 index 0000000000000000000000000000000000000000..60ad7af1c0a469beb6a07bf057a8647fcb98cca8 --- /dev/null +++ b/paddle/inference/io.cc @@ -0,0 +1,98 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/inference/io.h" + +#include +#include "paddle/framework/block_desc.h" +#include "paddle/framework/feed_fetch_type.h" + +namespace paddle { +namespace inference { + +bool IsParameter(const framework::VarDesc* var, + const framework::ProgramDesc& main_program) { + if (var->Persistable()) { + // There are many unreachable variables in the program + for (size_t i = 0; i < main_program.Size(); ++i) { + const framework::BlockDesc& block = main_program.Block(i); + for (auto* op : block.AllOps()) { + if (op->Type() == framework::kFeedOpType) { + continue; + } + for (auto input_argument_name : op->InputArgumentNames()) { + if (input_argument_name == var->Name()) { + return true; + } + } + } + } + } + return false; +} + +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + const framework::ProgramDesc& main_program) { + const framework::BlockDesc& global_block = main_program.Block(0); + + framework::ProgramDesc* load_program = new framework::ProgramDesc(); + framework::BlockDesc* load_block = load_program->MutableBlock(0); + for (auto* var : global_block.AllVars()) { + if (IsParameter(var, main_program)) { + VLOG(3) << "parameter's name: " << var->Name(); + + framework::VarDesc* new_var = load_block->Var(var->Name()); + new_var->SetShape(var->Shape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + + // append_op + framework::OpDesc* op = load_block->AppendOp(); + op->SetType("load"); + op->SetOutput("Out", {new_var->Name()}); + op->SetAttr("file_path", {dirname + "/" + new_var->Name()}); + op->CheckAttrs(); + } + } + executor.Run(*load_program, &scope, 0, true, true); + delete load_program; +} + +std::unique_ptr Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname) { + std::string model_filename = dirname + "/__model__"; + LOG(INFO) << "loading model from " << model_filename; + std::ifstream inputfs(model_filename, std::ios::in | std::ios::binary); + std::string program_desc_str; + inputfs.seekg(0, std::ios::end); + program_desc_str.resize(inputfs.tellg()); + inputfs.seekg(0, std::ios::beg); + LOG(INFO) << "program_desc_str's size: " << program_desc_str.size(); + inputfs.read(&program_desc_str[0], program_desc_str.size()); + inputfs.close(); + + std::unique_ptr main_program( + new framework::ProgramDesc(program_desc_str)); + + LoadPersistables(executor, scope, dirname, *main_program); + return main_program; +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/inference/io.h b/paddle/inference/io.h new file mode 100644 index 0000000000000000000000000000000000000000..962b6c4e20d30de3cc28eae1c8c5c33b3ab5f6ac --- /dev/null +++ b/paddle/inference/io.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/framework/executor.h" +#include "paddle/framework/program_desc.h" +#include "paddle/framework/scope.h" + +namespace paddle { +namespace inference { + +void LoadPersistables(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname, + const framework::ProgramDesc& main_program); + +std::unique_ptr Load(framework::Executor& executor, + framework::Scope& scope, + const std::string& dirname); + +} // namespace inference +} // namespace paddle diff --git a/paddle/inference/tests/book/CMakeLists.txt b/paddle/inference/tests/book/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3798fb8fd8769aef5940d4ce724cb0cc8686422 --- /dev/null +++ b/paddle/inference/tests/book/CMakeLists.txt @@ -0,0 +1,7 @@ +set(PYTHON_TESTS_DIR ${PADDLE_SOURCE_DIR}/python/paddle/v2/fluid/tests) +cc_test(test_inference_recognize_digits_mlp + SRCS test_inference_recognize_digits.cc + DEPS ARCHIVE_START paddle_fluid ARCHIVE_END + ARGS --dirname=${PYTHON_TESTS_DIR}/book/recognize_digits_mlp.inference.model) +set_tests_properties(test_inference_recognize_digits_mlp + PROPERTIES DEPENDS test_recognize_digits_mlp_cpu) diff --git a/paddle/inference/tests/book/test_inference_recognize_digits.cc b/paddle/inference/tests/book/test_inference_recognize_digits.cc new file mode 100644 index 0000000000000000000000000000000000000000..26dc2aee04261d9a1fd29b4d75bfacc7870c09d8 --- /dev/null +++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc @@ -0,0 +1,113 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "gflags/gflags.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/inference/io.h" + +DEFINE_string(dirname, "", "Directory of the inference model."); + +template +void TestInference(const std::string& dirname, + const std::vector& cpu_feeds, + std::vector& cpu_fetchs) { + // 1. Define place, executor and scope + auto place = Place(); + auto executor = paddle::framework::Executor(place); + auto* scope = new paddle::framework::Scope(); + + // 2. Initialize the inference_program and load all parameters from file + auto inference_program = paddle::inference::Load(executor, *scope, dirname); + + // 3. Get the feed_target_names and fetch_target_names + const std::vector& feed_target_names = + inference_program->GetFeedTargetNames(); + const std::vector& fetch_target_names = + inference_program->GetFetchTargetNames(); + + // 4. Prepare inputs: set up maps for feed targets + std::map feed_targets; + for (size_t i = 0; i < feed_target_names.size(); ++i) { + // Please make sure that cpu_feeds[i] is right for feed_target_names[i] + feed_targets[feed_target_names[i]] = cpu_feeds[i]; + } + + // 5. Define Tensor to get the outputs: set up maps for fetch targets + std::map fetch_targets; + for (size_t i = 0; i < fetch_target_names.size(); ++i) { + fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; + } + + // 6. Run the inference program + executor.Run(*inference_program, scope, feed_targets, fetch_targets); + + delete scope; +} + +TEST(inference, recognize_digits) { + if (FLAGS_dirname.empty()) { + LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model"; + } + + LOG(INFO) << "FLAGS_dirname: " << FLAGS_dirname << std::endl; + std::string dirname = FLAGS_dirname; + + // 0. Call `paddle::framework::InitDevices()` initialize all the devices + // In unittests, this is done in paddle/testing/paddle_gtest_main.cc + + paddle::framework::LoDTensor input; + srand(time(0)); + float* input_ptr = + input.mutable_data({1, 28, 28}, paddle::platform::CPUPlace()); + for (int i = 0; i < 784; ++i) { + input_ptr[i] = rand() / (static_cast(RAND_MAX)); + } + std::vector cpu_feeds; + cpu_feeds.push_back(&input); + + paddle::framework::LoDTensor output1; + std::vector cpu_fetchs1; + cpu_fetchs1.push_back(&output1); + + // Run inference on CPU + TestInference( + dirname, cpu_feeds, cpu_fetchs1); + LOG(INFO) << output1.dims(); + +#ifdef PADDLE_WITH_CUDA + paddle::framework::LoDTensor output2; + std::vector cpu_fetchs2; + cpu_fetchs2.push_back(&output2); + + // Run inference on CUDA GPU + TestInference( + dirname, cpu_feeds, cpu_fetchs2); + LOG(INFO) << output2.dims(); + + EXPECT_EQ(output1.dims(), output2.dims()); + EXPECT_EQ(output1.numel(), output2.numel()); + + float err = 1E-3; + int count = 0; + for (int64_t i = 0; i < output1.numel(); ++i) { + if (fabs(output1.data()[i] - output2.data()[i]) > err) { + count++; + } + } + EXPECT_EQ(count, 0) << "There are " << count << " different elements."; +#endif +} diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 50b441e75d05d985d2ab2e0c46218eacab45e2ea..e903f43ba69ee9e28b3a03e8921a41ffa81a2542 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -175,6 +175,8 @@ endif() # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) +op_library(save_combine_op DEPS lod_tensor) +op_library(load_combine_op DEPS lod_tensor) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) @@ -194,3 +196,4 @@ if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) +cc_test(save_load_combine_op_test SRCS save_load_combine_op_test.cc DEPS save_combine_op load_combine_op) diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu index 4e579387924a5b0499f29609bc6b1322030a3c0d..00cb6e9cafb4e79ed3d59cd4a6e40ea132e5efda 100644 --- a/paddle/operators/adagrad_op.cu +++ b/paddle/operators/adagrad_op.cu @@ -82,7 +82,7 @@ struct SparseAdagradFunctor { math::scatter::MergeAdd merge_func; auto grad_merge = merge_func(context, grad); auto* grad_merge_data = grad_merge.mutable_value()->template data(); - auto& merge_rows = grad_merge.rows(); + framework::Vector merge_rows(grad_merge.rows()); // 2. m += g_m * g_m math::scatter::Mul sqare_func; auto grad_square = sqare_func(context, grad_merge, grad_merge); @@ -101,8 +101,8 @@ struct SparseAdagradFunctor { SparseAdagradFunctorKernel< T, 256><<(context) - .stream()>>>(grad_merge_data, grad_merge.rows().data(), - lr, param_data, moment_data, grad_width, + .stream()>>>(grad_merge_data, merge_rows.cuda_data(), lr, + param_data, moment_data, grad_width, epsilon); } }; diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 9cc34bdded780e61e8700eb4fa4a295c84fb48bc..bf536687d398b8342e6ae76a07c11e5fe47483e0 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -199,7 +199,12 @@ class AdamOpKernel : public framework::OpKernel { merge_func(ctx.template device_context(), grad); auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); - auto* rows = grad_merge.rows().data(); + int64_t* rows = nullptr; + if (platform::is_gpu_place(ctx.GetPlace())) { + rows = grad_merge.mutable_rows()->cuda_data(); + } else { + rows = grad_merge.mutable_rows()->data(); + } auto row_numel = grad_tensor.numel() / grad_merge.rows().size(); SparseAdamFunctor functor( diff --git a/paddle/operators/ctc_align_op.cu b/paddle/operators/ctc_align_op.cu index 45635f16745346b08f7e31db2f25905bdbc3aeeb..2a970cd9fa965b4126356eaa1519068f9c7a7f34 100644 --- a/paddle/operators/ctc_align_op.cu +++ b/paddle/operators/ctc_align_op.cu @@ -69,12 +69,11 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { auto stream = ctx.cuda_device_context().stream(); MergeAndDelCudaKernel<<<1, 1, 0, stream>>>( - num_tokens, tokens, num_seq, input_lod[level].data(), blank, + num_tokens, tokens, num_seq, input_lod[level].cuda_data(), blank, merge_repeated, dev_out_lod0_ptr, output_data); // set output lod - thrust::host_vector host_out_lod0(dev_out_lod0.begin(), - dev_out_lod0.end()); + std::vector host_out_lod0(dev_out_lod0.begin(), dev_out_lod0.end()); framework::LoD out_lod; out_lod.push_back(host_out_lod0); output->set_lod(out_lod); diff --git a/paddle/operators/dropout_op.cc b/paddle/operators/dropout_op.cc index 35cb18797ff66cb87a6658e73ce02b0bfae29baa..5274aa204e6629c9c5ea850c433e0948c89015bd 100644 --- a/paddle/operators/dropout_op.cc +++ b/paddle/operators/dropout_op.cc @@ -51,6 +51,13 @@ class DropoutOpMaker : public framework::OpProtoAndCheckerMaker { "'dropout_prob' must be between 0.0 and 1.0."); }); AddAttr("is_test", "True if in test phase.").SetDefault(false); + AddAttr("fix_seed", + "A flag indicating whether to use a fixed seed to generate " + "random mask. NOTE: DO NOT set this flag to true in " + "training. Setting this flag to true is only useful in " + "unittest or for debug that always the same output units " + "will be dropped.") + .SetDefault(false); AddAttr("seed", "Dropout random seed.").SetDefault(0); AddComment(R"DOC( diff --git a/paddle/operators/dropout_op.cu b/paddle/operators/dropout_op.cu index c56930336e865079f1b96df0f35b0a051fe63a27..84d78445a4fa340ba3c066bb48b96b2a890db652 100644 --- a/paddle/operators/dropout_op.cu +++ b/paddle/operators/dropout_op.cu @@ -62,7 +62,11 @@ class GPUDropoutKernel : public framework::OpKernel { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); int size = framework::product(mask->dims()); - int seed = context.Attr("seed"); + + std::random_device rnd; + int seed = + context.Attr("fix_seed") ? context.Attr("seed") : rnd(); + thrust::counting_iterator index_sequence_begin(0); thrust::transform(index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(mask_data), diff --git a/paddle/operators/dropout_op.h b/paddle/operators/dropout_op.h index c90b8d277eb78048c001d36a367287146b51c636..46e5dbc64ff9ad3d04a9c1c07f4226932f661baf 100644 --- a/paddle/operators/dropout_op.h +++ b/paddle/operators/dropout_op.h @@ -38,9 +38,15 @@ class CPUDropoutKernel : public framework::OpKernel { if (!context.Attr("is_test")) { auto* mask = context.Output("Mask"); auto* mask_data = mask->mutable_data(context.GetPlace()); - int seed = context.Attr("seed"); + + // NOTE: fixed seed should only be used in unittest or for debug. + // Guarantee to use random seed in training. + std::random_device rnd; std::minstd_rand engine; + int seed = + context.Attr("fix_seed") ? context.Attr("seed") : rnd(); engine.seed(seed); + std::uniform_real_distribution dist(0, 1); size_t size = framework::product(mask->dims()); for (size_t i = 0; i < size; ++i) { diff --git a/paddle/operators/elementwise_pow_op.cc b/paddle/operators/elementwise_pow_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..5293cc7dd34ccee860c50e964516da9b4d42d29c --- /dev/null +++ b/paddle/operators/elementwise_pow_op.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/elementwise_pow_op.h" +#include "paddle/operators/elementwise_op.h" + +namespace paddle { +namespace operators { +class ElementwisePowOpMaker : public ElementwiseOpMaker { + public: + ElementwisePowOpMaker(OpProto* proto, OpAttrChecker* op_checker) + : ElementwiseOpMaker(proto, op_checker) { + SetComment("Pow", "Out = X ^ Y"); + AddComment(comment_); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(elementwise_pow, ops::ElementwiseOp, + ops::ElementwisePowOpMaker); +REGISTER_OP_CPU_KERNEL( + elementwise_pow, + ops::ElementwisePowKernel, + ops::ElementwisePowKernel); diff --git a/paddle/operators/elementwise_pow_op.cu b/paddle/operators/elementwise_pow_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..643c978e635bc8e9671b47774c2eac5b713f59c2 --- /dev/null +++ b/paddle/operators/elementwise_pow_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/elementwise_pow_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL( + elementwise_pow, + ops::ElementwisePowKernel, + ops::ElementwisePowKernel); diff --git a/paddle/operators/elementwise_pow_op.h b/paddle/operators/elementwise_pow_op.h new file mode 100644 index 0000000000000000000000000000000000000000..6019e709e0db0fd62b4d3350bb768095f87ef241 --- /dev/null +++ b/paddle/operators/elementwise_pow_op.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/operators/elementwise_op_function.h" + +namespace paddle { +namespace operators { + +template +struct PowFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return std::pow(a, b); } +}; + +template +class ElementwisePowKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + ElementwiseComputeEx, DeviceContext, T>(ctx); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gru_op.h b/paddle/operators/gru_op.h index b1957fb9ce6add8628cb206abf2c569d3f615c85..a08bd4233b02d021aaa64bafe4b855f11a60d338 100644 --- a/paddle/operators/gru_op.h +++ b/paddle/operators/gru_op.h @@ -30,11 +30,12 @@ using Tensor = framework::Tensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, *dst, indexed_src); } template @@ -76,7 +77,9 @@ class GRUKernel : public framework::OpKernel { gru_value.state_weight = const_cast(weight_data + 2 * frame_size * frame_size); Tensor ordered_h0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (h0) { // Since the batch computing for GRU reorders the input sequences // according to their length. The initialized cell state also needs @@ -159,7 +162,9 @@ class GRUGradKernel : public framework::OpKernel { zero(dev_ctx, &batch_reset_hidden_prev_grad, static_cast(0.0)); Tensor ordered_h0, ordered_h0_grad; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (h0) { ReorderInitState(dev_ctx, *h0, order, &ordered_h0, true); diff --git a/paddle/operators/label_smooth_op.cc b/paddle/operators/label_smooth_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c89082f44b360cbd171eccb212674040b8688a46 --- /dev/null +++ b/paddle/operators/label_smooth_op.cc @@ -0,0 +1,128 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/label_smooth_op.h" + +namespace paddle { +namespace operators { + +class LabelSmoothOp : public framework::OperatorWithKernel { + public: + LabelSmoothOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LabelSmoothOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of LabelSmoothOp should not be null."); + auto in_dims = ctx->GetInputDim("X"); + if (ctx->HasInput("PriorDist")) { + auto noise_dims = ctx->GetInputDim("PriorDist"); + auto noise_numel = paddle::framework::product(noise_dims); + PADDLE_ENFORCE( + in_dims[1] == noise_numel, + "The number of elements in Input(PriorDist) must be equal to the " + "dimension of each label."); + } + ctx->ShareLoD("X", /*->*/ "Out"); + ctx->SetOutputDim("Out", in_dims); + } +}; + +class LabelSmoothOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LabelSmoothOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", + "(LoDTensor) The input labels of LabelSmooth operator. This " + "input can be batched labels in one-hot encoding or output from " + "softmax, with shape [N x K], where N is the batch size and K is " + "the number of classes"); + AddInput("PriorDist", + "(Tensor, optional)" + "The prior distribution to be added to the smoothed label. It is " + "fixed during training and the number of elements should be equal " + "to the dimension K of each label. Default is uniform " + "distribution and each element will be set to 1/K if not provided " + "in input.") + .AsDispensable(); + AddOutput("Out", + "(loDTensor) The smoothed label of LabelSmooth operator. It has" + "the same shape and LoD with the Input(LoDTensor)."); + AddAttr("epsilon", + "(float, default 0.0f)" + "The smoothing parameter of LabelSmooth operator.") + .SetDefault(0.0f); + AddComment(R"DOC( +LabelSmooth Operator. + +Label smoothing is a mechanism to regularize the classifier layer. In machine +learning, optimizing the log-likelihood of the correct label directly may +cause two problems. First, it may result in overfitting: if the model learns +to assign full probability to the ground-truth label for each training example, +it is not guaranteed to generalize. Second, it encourages the differences +between the largest logit and all others to become large, reducing the ability +of the model to adapt. Label smoothing is proposed to encourage the model to +be less confident, which replaces the ground-truth label $y$ with the weighted +sum of itself and some fixed distribution $\mu$, i.e. + +$$ + \tilde{y} = (1 - \epsilon) * y + \epsilon * \mu, +$$ + +where $(1 - \epsilon)$ and $\epsilon$ are the weights respectively, and +$\tilde{y}$ is the smoothed label. Usually uniform distribution is used for +$\mu$. This change in the ground-truth label is called label-smoothing +regularization or LSR. + +See more details about label smoothing in https://arxiv.org/abs/1512.00567. + +)DOC"); + } +}; + +class LabelSmoothGradOp : public framework::OperatorWithKernel { + public: + LabelSmoothGradOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), + "Input(Out@GRAD) shouldn't be null."); + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } +}; + +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OP(label_smooth, ops::LabelSmoothOp, ops::LabelSmoothOpMaker, + label_smooth_grad, ops::LabelSmoothGradOp); +REGISTER_OP_CPU_KERNEL( + label_smooth, + ops::LabelSmoothKernel, + ops::LabelSmoothKernel); +REGISTER_OP_CPU_KERNEL( + label_smooth_grad, + ops::LabelSmoothGradKernel, + ops::LabelSmoothGradKernel); diff --git a/paddle/operators/label_smooth_op.cu b/paddle/operators/label_smooth_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..5a0cec12bc58a56e4b0c3bd6fbc6c4754ef81fa4 --- /dev/null +++ b/paddle/operators/label_smooth_op.cu @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/label_smooth_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_CUDA_KERNEL( + label_smooth, + ops::LabelSmoothKernel, + ops::LabelSmoothKernel); +REGISTER_OP_CUDA_KERNEL( + label_smooth_grad, + ops::LabelSmoothGradKernel, + ops::LabelSmoothGradKernel); diff --git a/paddle/operators/label_smooth_op.h b/paddle/operators/label_smooth_op.h new file mode 100644 index 0000000000000000000000000000000000000000..87bc9f793e3b4e249142710243c45d51f3a913b2 --- /dev/null +++ b/paddle/operators/label_smooth_op.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class LabelSmoothKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* out_t = ctx.Output("Out"); + auto* in_t = ctx.Input("X"); + auto* dist_t = ctx.Input("PriorDist"); + auto label_dim = in_t->dims()[1]; + out_t->mutable_data(ctx.GetPlace()); + + auto epsilon = ctx.Attr("epsilon"); + auto out = framework::EigenVector::Flatten(*out_t); + auto in = framework::EigenVector::Flatten(*in_t); + auto& dev = *ctx.template device_context().eigen_device(); + if (dist_t) { + auto dist = framework::EigenVector::Flatten(*dist_t); + out.device(dev) = + static_cast(1 - epsilon) * in + + epsilon * dist.broadcast(Eigen::DSizes(in_t->numel())); + } else { + out.device(dev) = static_cast(1 - epsilon) * in + + static_cast(epsilon / label_dim); + } + } +}; + +template +class LabelSmoothGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const { + auto* d_out_t = ctx.Input(framework::GradVarName("Out")); + auto* d_in_t = ctx.Output(framework::GradVarName("X")); + d_in_t->mutable_data(ctx.GetPlace()); + + auto d_out = framework::EigenVector::Flatten(*d_out_t); + auto d_in = framework::EigenVector::Flatten(*d_in_t); + + auto epsilon = ctx.Attr("epsilon"); + auto& dev = *ctx.template device_context().eigen_device(); + d_in.device(dev) = static_cast(1 - epsilon) * d_out; + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/layer_norm_op.cc b/paddle/operators/layer_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1c6d2ae4d05becaeed34d66cad398cc90f9d3ece --- /dev/null +++ b/paddle/operators/layer_norm_op.cc @@ -0,0 +1,370 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/layer_norm_op.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +template +using EigenMatrixMapRowMajor = Eigen::Map< + Eigen::Matrix>; +template +using ConstEigenMatrixMapRowMajor = Eigen::Map< + const Eigen::Matrix>; + +class LayerNormOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Y"), + "Output(Y) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Mean"), + "Output(Mean) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Variance"), + "Output(Variance) of LayerNormOp should not be null."); + + auto x_dim = ctx->GetInputDim("X"); + auto begin_norm_axis = ctx->Attrs().Get("begin_norm_axis"); + PADDLE_ENFORCE_LT(begin_norm_axis, x_dim.size(), + "'begin_norm_axis' must be less than the rank of X."); + + auto matrix_dim = framework::flatten_to_2d(x_dim, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + if (ctx->HasInput("Scale")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], right); + } + if (ctx->HasInput("Bias")) { + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); + PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias")[0], right); + } + + ctx->SetOutputDim("Y", ctx->GetInputDim("X")); + ctx->SetOutputDim("Mean", {left}); + ctx->SetOutputDim("Variance", {left}); + ctx->ShareLoD("X", "Y"); + } +}; + +class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LayerNormOpMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(LoDTensor) The input tensor."); + AddInput("Scale", + "(Tensor, optional) Scale is a 1-dimensional tensor of size " + "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." + "It is applied to the output.") + .AsDispensable(); + AddInput("Bias", + "(Tensor, optional) Bias is a 1-dimensional tensor of size " + "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." + "It is applied to the output.") + .AsDispensable(); + AddOutput("Y", "(LoDTensor) Result after normalization."); + AddOutput("Mean", "(Tensor) Mean of the current mini batch.") + .AsIntermediate(); + AddOutput("Variance", "(Tensor) Variance of the current mini batch.") + .AsIntermediate(); + + AddAttr("epsilon", + "(float, default 1e-5) Constant for " + "numerical stability") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, + "'epsilon' should be between 0.0 and 0.001."); + }); + AddAttr("begin_norm_axis", + "(int default:1), the " + "axis of `begin_norm_axis ... Rank(X) - 1` will be " + "normalized. `begin_norm_axis` splits the tensor(`X`) to a " + "matrix [N,H].") + .SetDefault(1) + .AddCustomChecker([](const int &begin_norm_axis) { + PADDLE_ENFORCE_GT(begin_norm_axis, 0, + "'begin_norm_axis' should be greater than zero."); + }); + + AddComment(R"DOC( +Layer Normalization. + +Layer Norm has been implemented as discussed in the paper: +https://arxiv.org/abs/1607.06450 +... +)DOC"); + } +}; + +template +class LayerNormKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const float epsilon = ctx.Attr("epsilon"); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *x = ctx.Input("X"); + const auto &x_dims = x->dims(); + const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); + + auto *output = ctx.Output("Y"); + auto *mean = ctx.Output("Mean"); + auto *var = ctx.Output("Variance"); + output->mutable_data(ctx.GetPlace()); + mean->mutable_data(ctx.GetPlace()); + var->mutable_data(ctx.GetPlace()); + + auto matrix_dim = framework::flatten_to_2d(x_dims, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + + auto input_map = ConstEigenMatrixMapRowMajor(x->data(), left, right); + + auto mean_map = EigenMatrixMapRowMajor(mean->data(), left, 1); + auto var_map = EigenMatrixMapRowMajor(var->data(), left, 1); + auto output_map = EigenMatrixMapRowMajor(output->data(), left, right); + + auto squre = [](T ele) { return ele * ele; }; + auto add_epslion = [epsilon](T ele) { return ele + epsilon; }; + + mean_map = input_map.rowwise().mean(); + var_map = (input_map - mean_map.replicate(1, right)) + .unaryExpr(squre) + .rowwise() + .mean() + .unaryExpr(add_epslion); + + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // TODO(zcd): Some thinking about output_map, is it appropriate that + // `output_map` and `input_map` point to the same memory. + auto inv_std = var_map.unaryExpr(inv_std_func); + if (scale && bias) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + auto bias_map = ConstEigenMatrixMapRowMajor(bias->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)) + + bias_map.replicate(left, 1); + } else if (scale) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)); + } else if (bias) { + auto bias_map = ConstEigenMatrixMapRowMajor(bias->data(), 1, right); + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)) + + bias_map.replicate(left, 1); + } else { + output_map = (input_map - mean_map.replicate(1, right)) + .cwiseProduct(inv_std.replicate(1, right)); + } + } +}; + +class LayerNormGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + // check input + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Scale"), + "Input(Scale) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Mean"), + "Input(Mean) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Variance"), + "Input(Variance) of LayerNormOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Y")), + "Input(Y@GRAD) of LayerNormOp should not be null."); + + // check output + if (ctx->HasOutput(framework::GradVarName("X"))) { + ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); + } + if (ctx->HasOutput(framework::GradVarName("Scale"))) { + ctx->SetOutputDim(framework::GradVarName("Scale"), + ctx->GetInputDim("Scale")); + } + if (ctx->HasOutput(framework::GradVarName("Bias"))) { + ctx->SetOutputDim(framework::GradVarName("Bias"), + ctx->GetInputDim("Bias")); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW("can't find Y@GRAD"); + } + return framework::OpKernelType(framework::ToDataType(t->type()), + ctx.GetPlace()); + } +}; + +template +class LayerNormGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *x = ctx.Input("X"); + const auto *mean = ctx.Input("Mean"); + const auto *var = ctx.Input("Variance"); + const auto *scale = ctx.Input("Scale"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + + const auto &x_dims = x->dims(); + + const auto begin_norm_axis = ctx.Attr("begin_norm_axis"); + auto matrix_dim = framework::flatten_to_2d(x_dims, begin_norm_axis); + int left = static_cast(matrix_dim[0]); + int right = static_cast(matrix_dim[1]); + + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + auto x_map = ConstEigenMatrixMapRowMajor(x->data(), left, right); + auto d_y_map = ConstEigenMatrixMapRowMajor(d_y->data(), left, right); + auto mean_map = ConstEigenMatrixMapRowMajor(mean->data(), left, 1); + auto var_map = ConstEigenMatrixMapRowMajor(var->data(), left, 1); + + if (d_bias) { + d_bias->mutable_data(ctx.GetPlace()); + auto d_bias_map = EigenMatrixMapRowMajor(d_bias->data(), 1, right); + d_bias_map = d_y_map.colwise().sum(); + } + if (d_scale) { + d_scale->mutable_data(ctx.GetPlace()); + auto d_scale_map = + EigenMatrixMapRowMajor(d_scale->data(), 1, right); + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // There are two equation to compute d_scale. One uses "Y" and the other + // does not use "Y" + d_scale_map = + ((x_map - mean_map.replicate(1, right)) + .cwiseProduct( + var_map.unaryExpr(inv_std_func).replicate(1, right)) + .cwiseProduct(d_y_map)) + .colwise() + .sum(); + } + + if (d_x) { + d_x->mutable_data(ctx.GetPlace()); + auto d_x_map = EigenMatrixMapRowMajor(d_x->data(), left, right); + auto triple_product_func = [](T ele) { return ele * ele * ele; }; + auto inv_std_func = [](T ele) { return std::sqrt(1 / ele); }; + // TODO(zcd): these code can be refined + if (d_scale) { + auto scale_map = + ConstEigenMatrixMapRowMajor(scale->data(), 1, right); + // dy_dx + auto dx_end = var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .cwiseProduct(scale_map.replicate(left, 1)); + // dy_dmean_dx + auto dx_mean = (T(-1.0) / right) * + var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .cwiseProduct(scale_map.replicate(left, 1)) + .rowwise() + .sum() + .replicate(1, right); + // dy_var_dx + auto dvar_end_part = (x_map - mean_map.replicate(1, right)) + .cwiseProduct(scale_map.replicate(left, 1)) + .cwiseProduct(d_y_map) + .rowwise() + .sum(); + auto dvar_end = var_map.unaryExpr(inv_std_func) + .unaryExpr(triple_product_func) + .cwiseProduct(dvar_end_part) + .replicate(1, right); + auto dx_var = + (T(-1.0) / right) * + (x_map - mean_map.replicate(1, right)).cwiseProduct(dvar_end); + + d_x_map = dx_end + dx_mean + dx_var; + } else { + // dy_dx + auto dx_end = var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map); + // dy_dmean_dx + auto dx_mean = (T(-1.0) / right) * + var_map.unaryExpr(inv_std_func) + .replicate(1, right) + .cwiseProduct(d_y_map) + .rowwise() + .sum() + .replicate(1, right); + // dy_var_dx + auto dvar_end_part = (x_map - mean_map.replicate(1, right)) + .cwiseProduct(d_y_map) + .rowwise() + .sum(); + auto dvar_end = var_map.unaryExpr(inv_std_func) + .unaryExpr(triple_product_func) + .cwiseProduct(dvar_end_part) + .replicate(1, right); + auto dx_var = + (T(-1.0) / right) * + (x_map - mean_map.replicate(1, right)).cwiseProduct(dvar_end); + + d_x_map = dx_end + dx_mean + dx_var; + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(layer_norm, ops::LayerNormOp, ops::LayerNormOpMaker, + layer_norm_grad, ops::LayerNormGradOp); +REGISTER_OP_CPU_KERNEL( + layer_norm, + ops::LayerNormKernel); +REGISTER_OP_CPU_KERNEL( + layer_norm_grad, + ops::LayerNormGradKernel); diff --git a/paddle/operators/layer_norm_op.h b/paddle/operators/layer_norm_op.h new file mode 100644 index 0000000000000000000000000000000000000000..bca35b91e6f52d35dee14aac9d080b52914942e3 --- /dev/null +++ b/paddle/operators/layer_norm_op.h @@ -0,0 +1,35 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class LayerNormKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +template +class LayerNormGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/load_combine_op.cc b/paddle/operators/load_combine_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f4be793d7bf1f346c011842c57fb5b5179a697d6 --- /dev/null +++ b/paddle/operators/load_combine_op.cc @@ -0,0 +1,108 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#include + +#include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { + +class LoadCombineOp : public framework::OperatorBase { + public: + LoadCombineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + auto filename = Attr("file_path"); + + std::ifstream fin(filename); + PADDLE_ENFORCE(static_cast(fin), + "Cannot open file %s for load_combine op", filename); + + auto out_var_names = Outputs("Out"); + PADDLE_ENFORCE_GT( + static_cast(out_var_names.size()), 0, + "The number of output variables should be greater than 0."); + + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + for (size_t i = 0; i < out_var_names.size(); i++) { + auto *out_var = scope.FindVar(out_var_names[i]); + + PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", + out_var_names[i]); + + auto *tensor = out_var->GetMutable(); + + // Error checking + PADDLE_ENFORCE(static_cast(fin), "Cannot read more from file %s", + filename); + + // Get data from fin to tensor + DeserializeFromStream(fin, tensor, dev_ctx); + + if (platform::is_gpu_place(place)) { + // copy CPU to GPU + framework::LoDTensor cpu_tensor; + cpu_tensor.ShareDataWith(*tensor); + cpu_tensor.set_lod(tensor->lod()); + + // reset tensor + out_var->Clear(); + tensor = out_var->GetMutable(); + tensor->set_lod(cpu_tensor.lod()); + Copy(cpu_tensor, place, dev_ctx, tensor); + } + } + } +}; + +class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + LoadCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddOutput( + "Out", + "(vector) The output LoDTensors that will be read from the input file.") + .AsDuplicable(); + AddAttr("file_path", + "(string) " + "LoDTensors will be loaded from \"file_path\".") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + AddComment(R"DOC( +LoadCombine Operator. + +LoadCombine operator loads LoDTensor variables from a file. The file should +contain one or more LoDTensors serialized using the SaveCombine operator. The +LoadCombine operator applies a deserialization strategy to appropriately load +the LodTensors, and this strategy complements the serialization strategy used +in the SaveCombine operator. Hence, the LoadCombine operator is tightly coupled +with the SaveCombine operator, and can only deserialize one or more LoDTensors +that were saved using the SaveCombine operator. + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle +namespace ops = paddle::operators; + +REGISTER_OPERATOR(load_combine, ops::LoadCombineOp, + ops::LoadCombineOpProtoMaker); diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu index d97390fa1c53fa0bdf16ab34cb209b994621f83c..07372808bbf078bd2e9b0bb5782b95a046253f46 100644 --- a/paddle/operators/lookup_table_op.cu +++ b/paddle/operators/lookup_table_op.cu @@ -125,8 +125,8 @@ class LookupTableGradCUDAKernel : public framework::OpKernel { new_rows.resize(ids_dim[0]); auto gpu_place = boost::get(context.GetPlace()); - memory::Copy(platform::CPUPlace(), new_rows.data(), gpu_place, ids_data, - ids_dim[0] * sizeof(int64_t), stream); + memory::Copy(platform::CPUPlace(), new_rows.cuda_data(), gpu_place, + ids_data, ids_dim[0] * sizeof(int64_t), stream); d_table->set_rows(new_rows); diff --git a/paddle/operators/lstm_op.h b/paddle/operators/lstm_op.h index c57ee414dc5b3417549c8ac3a7fd57a9c8f452df..72e95b75e29c88c5944607ceaa40435bac7a745c 100644 --- a/paddle/operators/lstm_op.h +++ b/paddle/operators/lstm_op.h @@ -27,11 +27,12 @@ using Tensor = framework::Tensor; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, *dst, indexed_src); } template @@ -84,7 +85,9 @@ class LSTMKernel : public framework::OpKernel { } lstm_value.prev_state_value = nullptr; Tensor ordered_c0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (cell_t0) { // Since the batch computing for LSTM reorders the input sequence // according to their length. The initialized cell state also needs @@ -202,7 +205,8 @@ class LSTMGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - const size_t* order = batch_gate->lod()[2].data(); + framework::Vector order(batch_gate->lod()[2]); + if (c0) { ReorderInitState(device_ctx, *c0, order, &ordered_c0, true); diff --git a/paddle/operators/lstmp_op.h b/paddle/operators/lstmp_op.h index ee82d5c10a5421b181e525f49a263d4808ede62f..e064a155dfadd8104fa80727a962cb2e24ade29f 100644 --- a/paddle/operators/lstmp_op.h +++ b/paddle/operators/lstmp_op.h @@ -34,7 +34,8 @@ using EigenMatrix = framework::EigenMatrix; template inline void ReorderInitState(const DeviceContext& ctx, - const framework::Tensor& src, const size_t* index, + const framework::Tensor& src, + framework::Vector index, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); @@ -109,7 +110,9 @@ class LSTMPKernel : public framework::OpKernel { } lstmp_value.prev_state_value = nullptr; Tensor ordered_c0; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (cell_t0) { // Since the batch computing for LSTMP reorders the input sequence // according to their length. The initialized cell state also needs @@ -275,7 +278,9 @@ class LSTMPGradKernel : public framework::OpKernel { // ordered_h0_g/c0_g is the reordered gradient of hidden/cell // initialization. Tensor ordered_h0, ordered_c0, ordered_h0_g, ordered_c0_g; - const size_t* order = batch_gate->lod()[2].data(); + + framework::Vector order(batch_gate->lod()[2]); + if (c0) { ReorderInitState(device_ctx, *c0, order, &ordered_c0, true); diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 0ee456f9bc61436bd0f2f8ef20dd1654e7e56d56..acdd87cb3550bc5f3891aed6fefd4301a3395f9f 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -31,7 +31,7 @@ struct SelectedRowsAdd { PADDLE_ENFORCE_EQ(in1_height, input2.height()); output->set_height(in1_height); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); auto& in2_rows = input2.rows(); std::vector out_rows; out_rows.reserve(in1_rows.size() + in2_rows.size()); @@ -108,7 +108,7 @@ struct SelectedRowsAddTensor { PADDLE_ENFORCE_EQ(in1_height, out_dims[0]); auto& in1_value = input1.value(); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); PADDLE_ENFORCE_EQ(in1_row_numel, input2.numel() / in1_height); @@ -126,7 +126,7 @@ struct SelectedRowsAddTensor { dim3 grid(1, in1_rows.size()); SelectedRowsAddTensorKernel< T, block_size><<>>( - in1_data, in1_rows.data(), out_data, in1_row_numel); + in1_data, in1_rows.cuda_data(), out_data, in1_row_numel); auto out_eigen = framework::EigenVector::Flatten(*output); auto in2_eigen = framework::EigenVector::Flatten(input2); @@ -146,7 +146,7 @@ struct SelectedRowsAddTo { auto in1_height = input1.height(); PADDLE_ENFORCE_EQ(in1_height, input2->height()); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); auto& in2_rows = *(input2->mutable_rows()); auto& in1_value = input1.value(); @@ -204,7 +204,7 @@ struct SelectedRowsAddToTensor { PADDLE_ENFORCE_EQ(in1_height, in2_dims[0]); auto& in1_value = input1.value(); - auto& in1_rows = input1.rows(); + framework::Vector in1_rows(input1.rows()); int64_t in1_row_numel = in1_value.numel() / in1_rows.size(); PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); @@ -216,7 +216,7 @@ struct SelectedRowsAddToTensor { dim3 grid(1, in1_rows.size()); SelectedRowsAddToTensorKernel< T, block_size><<>>( - in1_data, in1_rows.data(), in2_data, in1_row_numel); + in1_data, in1_rows.cuda_data(), in2_data, in1_row_numel); } }; @@ -257,7 +257,7 @@ struct MergeAdd { framework::SelectedRows operator()(const platform::CUDADeviceContext& context, const framework::SelectedRows& input) { framework::SelectedRows out; - auto input_rows = input.rows(); + framework::Vector input_rows(input.rows()); std::set row_set(input_rows.begin(), input_rows.end()); std::vector merge_rows(row_set.begin(), row_set.end()); @@ -283,9 +283,9 @@ struct MergeAdd { MergeAddKernel< T, 256><<(context) - .stream()>>>(input_data, input.rows().data(), out_data, - out.rows().data(), out.rows().size(), - input_width); + .stream()>>>(input_data, input_rows.cuda_data(), out_data, + out.mutable_rows()->cuda_data(), + out.rows().size(), input_width); return out; } }; @@ -370,8 +370,8 @@ struct UpdateToTensor { dim3 threads(platform::PADDLE_CUDA_NUM_THREADS, 1); dim3 grid(1, in1_rows.size()); UpdateToTensorKernel<<< - grid, threads, 0, context.stream()>>>(in1_data, in1_rows.data(), op, - in2_data, in1_row_numel); + grid, threads, 0, context.stream()>>>(in1_data, in1_rows.cuda_data(), + op, in2_data, in1_row_numel); } }; } // namespace scatter diff --git a/paddle/operators/math/sequence2batch.cc b/paddle/operators/math/sequence2batch.cc index e459a42ca251a9fc79f745f48a118ce898a0f77e..17abce1c2f809f75edb2c5dc46709094c2ce10c3 100644 --- a/paddle/operators/math/sequence2batch.cc +++ b/paddle/operators/math/sequence2batch.cc @@ -23,8 +23,10 @@ template class CopyMatrixRowsFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& src, const size_t* index, - framework::Tensor& dst, bool is_src_index) { + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor& dst, + bool is_src_index) { + size_t* index = index_lod.data(); auto src_dims = src.dims(); auto dst_dims = dst.dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, diff --git a/paddle/operators/math/sequence2batch.cu b/paddle/operators/math/sequence2batch.cu index 452ae8951000872b706f7e4227a62dbf98109e7e..f27631271a42b4d64abef00d7f119b85e32edda4 100644 --- a/paddle/operators/math/sequence2batch.cu +++ b/paddle/operators/math/sequence2batch.cu @@ -42,8 +42,10 @@ template class CopyMatrixRowsFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& src, const size_t* index, - framework::Tensor& dst, bool is_src_index) { + const framework::Tensor& src, + framework::Vector index_lod, framework::Tensor& dst, + bool is_src_index) { + size_t* index = index_lod.cuda_data(); auto src_dims = src.dims(); auto dst_dims = dst.dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2, diff --git a/paddle/operators/math/sequence2batch.h b/paddle/operators/math/sequence2batch.h index a5c43a2c7d4d729c35a20a27de2a23141e6019bc..6db0427b4174a09dd254d771e8d3d215cc6571a9 100644 --- a/paddle/operators/math/sequence2batch.h +++ b/paddle/operators/math/sequence2batch.h @@ -35,7 +35,7 @@ class CopyMatrixRowsFunctor { // copy the input src to the indexed rows of output dst. // The indexed rows are based on the input index. void operator()(const DeviceContext& context, const framework::Tensor& src, - const size_t* index, framework::Tensor& dst, + framework::Vector index_lod, framework::Tensor& dst, bool is_src_index); }; @@ -66,7 +66,7 @@ class LoDTensor2BatchFunctor { PADDLE_ENFORCE_EQ(lods[1].size(), static_cast(lod_tensor.dims()[0])); CopyMatrixRowsFunctor to_batch; - to_batch(context, lod_tensor, lods[1].data(), batch, true); + to_batch(context, lod_tensor, lods[1], batch, true); return; } @@ -144,7 +144,7 @@ class LoDTensor2BatchFunctor { batch.set_lod(batch_lods); CopyMatrixRowsFunctor to_batch; - to_batch(context, lod_tensor, seq2batch_idx, batch, true); + to_batch(context, lod_tensor, batch_lods[1], batch, true); } }; @@ -159,8 +159,7 @@ class Batch2LoDTensorFunctor { PADDLE_ENFORCE_EQ(in_lod[1].size(), static_cast(lod_tensor.dims()[0])); CopyMatrixRowsFunctor to_seq; - size_t* index = in_lod[1].data(); - to_seq(context, batch, index, lod_tensor, false); + to_seq(context, batch, in_lod[1], lod_tensor, false); } }; diff --git a/paddle/operators/math/sequence_padding.cu b/paddle/operators/math/sequence_padding.cu index a38df26f59569c4fd54a1ba5691b2cd5f3245344..65c9cfe4a0ec14d220ad237baa71703a783ed0fa 100644 --- a/paddle/operators/math/sequence_padding.cu +++ b/paddle/operators/math/sequence_padding.cu @@ -120,12 +120,14 @@ class PaddingLoDTensorFunctor { T* padding_data = padding.data(); if (norm_by_times) { SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + padding_data, const_cast(seq_data), + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } else { SequencePaddingKernel<<>>( - padding_data, const_cast(seq_data), abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + padding_data, const_cast(seq_data), + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } } }; @@ -193,12 +195,14 @@ class UnpaddingLoDTensorFunctor { T* seq_data = seq.data(); if (norm_by_times) { SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + const_cast(padding_data), seq_data, + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } else { SequencePaddingKernel<<>>( - const_cast(padding_data), seq_data, abs_offset_lod[level].data(), - sequence_width, max_sequence_length, num_sequences); + const_cast(padding_data), seq_data, + abs_offset_lod[level].cuda_data(), sequence_width, + max_sequence_length, num_sequences); } } }; diff --git a/paddle/operators/math/sequence_pooling.cu b/paddle/operators/math/sequence_pooling.cu index 4c9e6b375ce7251747b9cd443d86cca0858c84ef..f66534a6812a66c737445ea96914a393077d7d65 100644 --- a/paddle/operators/math/sequence_pooling.cu +++ b/paddle/operators/math/sequence_pooling.cu @@ -73,7 +73,7 @@ class MaxSeqPoolFunctor { dim3 grid(num_seq, 1); auto stream = context.stream(); KeMaxSequencePool<<>>( - in_data, starts.data(), out_data, max_index, num_seq, dim); + in_data, starts.cuda_data(), out_data, max_index, num_seq, dim); } }; diff --git a/paddle/operators/math/sequence_scale.cu b/paddle/operators/math/sequence_scale.cu index ceaabd8e0fd81c927fbd4333c0aa7954b8da8513..fd4e28f6113729cd1fa9dc179bd9b601d29b8a7f 100644 --- a/paddle/operators/math/sequence_scale.cu +++ b/paddle/operators/math/sequence_scale.cu @@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor { SequenceScaleKernel<<< num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( - seq_data, abs_offset_lod[level].data(), scales, seq_width); + seq_data, abs_offset_lod[level].cuda_data(), scales, seq_width); } }; diff --git a/paddle/operators/reduce_op.cc b/paddle/operators/reduce_op.cc index 4a06babeda00f2420df80f81f876a0047a3285ef..84f24a909597915f0eebb6c9cad37510cbe93e7b 100644 --- a/paddle/operators/reduce_op.cc +++ b/paddle/operators/reduce_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/operators/reduce_op.h" -#include "paddle/operators/net_op.h" namespace paddle { namespace operators { @@ -38,10 +37,14 @@ class ReduceOp : public framework::OperatorWithKernel { dim, x_rank, "The dim should be in the range [-rank(input), rank(input))."); bool reduce_all = ctx->Attrs().Get("reduce_all"); + bool keep_dim = ctx->Attrs().Get("keep_dim"); if (reduce_all) { - ctx->SetOutputDim("Out", {1}); + if (keep_dim) + ctx->SetOutputDim( + "Out", framework::make_ddim(std::vector(x_rank, 1))); + else + ctx->SetOutputDim("Out", {1}); } else { - bool keep_dim = ctx->Attrs().Get("keep_dim"); auto dims_vector = vectorize(x_dims); if (keep_dim || x_rank == 1) { dims_vector[dim] = 1; diff --git a/paddle/operators/row_conv_op.cu b/paddle/operators/row_conv_op.cu index 41f2c5b9de91ade15b4010f56377675cfd1b611c..b3825212e1ac41b13a2f4cad2c128da39c5f6e71 100644 --- a/paddle/operators/row_conv_op.cu +++ b/paddle/operators/row_conv_op.cu @@ -307,7 +307,7 @@ class RowConvKernel int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - size_t *idx = batch_indices.data(); + size_t *idx = batch_indices.cuda_data(); auto stream = context.cuda_device_context().stream(); if (future_context <= 32) { @@ -345,7 +345,7 @@ class RowConvGradKernel int input_dim = X->dims()[1]; int num_sequence = batch_indices.size() - 1; int future_context = Filter->dims()[0]; - size_t *idx = batch_indices.data(); + size_t *idx = batch_indices.cuda_data(); auto &device_ctx = context.cuda_device_context(); math::SetConstant zero; diff --git a/paddle/operators/save_combine_op.cc b/paddle/operators/save_combine_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..bffa2908bc42d73332f22fa3706d24ab49cd4b38 --- /dev/null +++ b/paddle/operators/save_combine_op.cc @@ -0,0 +1,141 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/platform/device_context.h" + +namespace paddle { +namespace operators { + +// TODO(sidgoyal78): These function are needed by other files (save_op), move +// them to paddle::filesystem namespace. (as noted by yuyang18 in save_op). +constexpr char kSEP = '/'; +static bool FileExists(const std::string &filepath) { + struct stat buffer; + return (stat(filepath.c_str(), &buffer) == 0); +} + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(kSEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static void MkDir(const char *path) { + if (mkdir(path, 0755)) { + PADDLE_ENFORCE_EQ(errno, EEXIST, "%s mkdir failed!", path); + } +} + +static void MkDirRecursively(const char *fullpath) { + if (*fullpath == '\0') return; // empty string + if (FileExists(fullpath)) return; + + MkDirRecursively(DirName(fullpath).c_str()); + MkDir(fullpath); +} + +class SaveCombineOp : public framework::OperatorBase { + public: + SaveCombineOp(const std::string &type, + const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::Place &place) const override { + auto filename = Attr("file_path"); + auto overwrite = Attr("overwrite"); + + bool is_present = FileExists(filename); + if (is_present && !overwrite) { + PADDLE_THROW("%s exists!, cannot save_combine to it when overwrite=false", + filename, overwrite); + } + + MkDirRecursively(DirName(filename).c_str()); + std::ofstream fout(filename); + PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + filename); + + auto inp_var_names = Inputs("X"); + PADDLE_ENFORCE_GT(static_cast(inp_var_names.size()), 0, + "The number of input variables should be greater than 0"); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + for (size_t i = 0; i < inp_var_names.size(); i++) { + auto *var = scope.FindVar(inp_var_names[i]); + + PADDLE_ENFORCE(var != nullptr, + "Cannot find variable %s for save_combine_op", + inp_var_names[i]); + PADDLE_ENFORCE(var->IsType(), + "SaveCombineOp only supports LoDTensor, %s has wrong type", + inp_var_names[i]); + + auto &tensor = var->Get(); + // Serialize tensor + framework::SerializeToStream(fout, tensor, dev_ctx); + } + fout.close(); + } +}; + +class SaveCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker { + public: + SaveCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput( + "X", + "(vector) Input LoDTensors that need to be saved together in a file.") + .AsDuplicable(); + AddComment(R"DOC( +SaveCombine operator + +This operator will serialize and write a list of input LoDTensor variables +to a file on disk. +)DOC"); + AddAttr("overwrite", + "(boolean, default true)" + "Overwrite the output file if it exists.") + .SetDefault(true); + AddAttr( + "file_path", + "(string)" + "The \"file_path\" where the LoDTensor variables will be saved.") + .AddCustomChecker( + [](const std::string &path) { return !path.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(save_combine, ops::SaveCombineOp, + ops::SaveCombineOpProtoMaker); diff --git a/paddle/operators/save_load_combine_op_test.cc b/paddle/operators/save_load_combine_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..f3ddc4a6c55d72e4e444869a1ebcd7662c892317 --- /dev/null +++ b/paddle/operators/save_load_combine_op_test.cc @@ -0,0 +1,180 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" + +USE_NO_KERNEL_OP(save_combine); +USE_NO_KERNEL_OP(load_combine); + +int* CreateForSaveCombineOp(int x, int y, const std::vector& lod_info, + std::string var_name, + paddle::platform::CPUPlace& place, + paddle::framework::Scope& scope, + paddle::framework::LoD& expect_lod) { + auto var = scope.Var(var_name); + auto tensor = var->GetMutable(); + tensor->Resize({x, y}); + expect_lod.resize(1); + for (size_t i = 0; i < lod_info.size(); i++) { + expect_lod[0].push_back(lod_info[i]); + } + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + return expect; +} + +paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad( + const std::string out_var_name, paddle::framework::Scope& scope) { + auto load_var = scope.Var(out_var_name); + auto target = load_var->GetMutable(); + return target; +} + +int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target, + paddle::framework::Scope& scope, + paddle::framework::LoD& actual_lod) { + int* actual = target->data(); + actual_lod = target->lod(); + return actual; +} + +void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod, + paddle::framework::LoD actual_lod, const int& numel) { + for (int64_t i = 0; i < numel; ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} + +// Here, we create 4 LoDTensors and use save_combine_op to first save these +// in a single file. Then, we use load_combine_op to load these sequentially +TEST(SaveLoadCombineOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + + std::vector lod1 = {0, 1, 2, 3, 10}; + int numel1 = 100; + paddle::framework::LoD expect_lod1; + int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place, scope, + expect_lod1); + + std::vector lod2 = {0, 2, 5, 10}; + int numel2 = 200; + paddle::framework::LoD expect_lod2; + int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place, scope, + expect_lod2); + + std::vector lod3 = {0, 2, 3, 20}; + int numel3 = 4000; + paddle::framework::LoD expect_lod3; + int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place, + scope, expect_lod3); + + std::vector lod4 = {0, 1, 20}; + int numel4 = 1000; + paddle::framework::LoD expect_lod4; + int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place, scope, + expect_lod4); + + // Set attributes + std::string filename = "check_tensor.ls"; + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string(filename)}); + + // Run the save_combine_op + auto save_combine_op = paddle::framework::OpRegistry::CreateOp( + "save_combine", + {{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs); + save_combine_op->Run(scope, place); + + // Set up output vars + auto target1 = GeneratePlaceholderBeforeLoad("out_var1", scope); + auto target2 = GeneratePlaceholderBeforeLoad("out_var2", scope); + auto target3 = GeneratePlaceholderBeforeLoad("out_var3", scope); + auto target4 = GeneratePlaceholderBeforeLoad("out_var4", scope); + + // Run the load_combine_op + auto load_combine_op = paddle::framework::OpRegistry::CreateOp( + "load_combine", {}, + {{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs); + load_combine_op->Run(scope, place); + + paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4; + int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, actual_lod1); + int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, actual_lod2); + int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, actual_lod3); + int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, actual_lod4); + + CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1); + CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2); + CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3); + CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4); +} + +// Test with original SaveLoadTest +TEST(SaveLoadTestWithCombineOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({3, 10}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + expect_lod[0].push_back(0); + expect_lod[0].push_back(1); + expect_lod[0].push_back(2); + expect_lod[0].push_back(3); + + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("check_t.save")}); + + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save_combine", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, place); + + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load_combine", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, place); + int* actual = target->data(); + for (int64_t i = 0; i < tensor->numel(); ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + auto& actual_lod = target->lod(); + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} diff --git a/paddle/operators/save_load_op_test.cc b/paddle/operators/save_load_op_test.cc index 40103d864fb58804b39ca5f3c63e802a430ce886..d829d5da174b73613da9dcfcd308a5b05e12bce9 100644 --- a/paddle/operators/save_load_op_test.cc +++ b/paddle/operators/save_load_op_test.cc @@ -24,7 +24,7 @@ TEST(SaveLoadOp, CPU) { auto var = scope.Var("test_var"); auto tensor = var->GetMutable(); - tensor->Resize({10, 10}); + tensor->Resize({3, 10}); paddle::framework::LoD expect_lod; expect_lod.resize(1); expect_lod[0].push_back(0); diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index 51e6b9de58b64f6aa9d86d037abe907bba37d3ff..291d19ba8bd6113fb69719d5385792ad5cde387d 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -42,17 +42,25 @@ class SendOp : public framework::OperatorBase { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); + + auto client_var_name = Output("RPCClient"); + PADDLE_ENFORCE_NOT_NULL(scope.FindVar(client_var_name), + "Can not find variable '%s' in the scope.", + client_var_name); + auto* client_var = scope.FindVar(client_var_name); + detail::RPCClient* rpc_client = client_var->GetMutable(); + for (size_t i = 0; i < ins.size(); i++) { VLOG(3) << "sending " << ins[i] << " to " << epmap[i]; - client_.AsyncSendVariable(epmap[i], ctx, scope, ins[i]); + rpc_client->AsyncSendVariable(epmap[i], ctx, scope, ins[i]); } - PADDLE_ENFORCE(client_.Wait()); + PADDLE_ENFORCE(rpc_client->Wait()); for (auto& ep : endpoints) { VLOG(3) << "batch barrier, ep: " << ep; - client_.AsyncSendBatchBarrier(ep); + rpc_client->AsyncSendBatchBarrier(ep); } - PADDLE_ENFORCE(client_.Wait()); + PADDLE_ENFORCE(rpc_client->Wait()); if (outs.size() > 0) { for (size_t i = 0; i < outs.size(); i++) { @@ -62,11 +70,6 @@ class SendOp : public framework::OperatorBase { PADDLE_ENFORCE(client_.Wait()); } } - - private: - // TODO(typhoonzero): put RPCClient in a Variable, so that - // send and recv can use the same connection. - mutable detail::RPCClient client_; }; class SendOpMaker : public framework::OpProtoAndCheckerMaker { @@ -76,6 +79,9 @@ class SendOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "(Tensor) Input tensor to be sent").AsDuplicable(); AddOutput("Out", "(Tensor) Output tensor to be received from server") .AsDuplicable(); + AddOutput("RPCClient", + "(RPCClient) The RPC client object which is" + "initialized at most once."); AddComment(R"DOC( Send operator diff --git a/paddle/operators/sequence_erase_op.cu b/paddle/operators/sequence_erase_op.cu index f1e3b96acd0259de2b3ca1348834bd17e1e174a2..a5311f15f0c607c880a6f12c0bef10b2dd8c8a79 100644 --- a/paddle/operators/sequence_erase_op.cu +++ b/paddle/operators/sequence_erase_op.cu @@ -96,9 +96,8 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel { GetOutLod<<<(lod_len - 1) / PADDLE_CUDA_NUM_THREADS + 1, PADDLE_CUDA_NUM_THREADS, 0, stream>>>( num_erased_ptr, dev_in_lod_ptr, lod_len, dev_out_lod_ptr); - // Set LoD for output - thrust::host_vector out_lod0 = dev_out_lod; + std::vector out_lod0(dev_out_lod.begin(), dev_out_lod.end()); framework::LoD out_lod; out_lod.push_back(out_lod0); out->set_lod(out_lod); diff --git a/paddle/operators/sequence_reshape_op.cc b/paddle/operators/sequence_reshape_op.cc index 57cca13105537d88fe942b850cae10650d3096e2..d89a46a712c9c84a142e1e347219ed171556d761 100644 --- a/paddle/operators/sequence_reshape_op.cc +++ b/paddle/operators/sequence_reshape_op.cc @@ -30,8 +30,13 @@ class SequenceReshapeOp : public framework::OperatorWithKernel { auto x_numel = product(x_dims); PADDLE_ENFORCE_EQ(x_dims.size(), 2U, "Rank of Input(X) should be 2."); int new_dim = ctx->Attrs().Get("new_dim"); - ctx->SetOutputDim("Out", - {x_numel / new_dim, static_cast(new_dim)}); + if (ctx->IsRuntime()) { + ctx->SetOutputDim("Out", + {x_numel / new_dim, static_cast(new_dim)}); + } else { + // when compiling, the batch size is undetermined, just set to -1 + ctx->SetOutputDim("Out", {-1, static_cast(new_dim)}); + } } }; diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 42f8f8b2f072f9d204dfadcd732926b5c98dc617..29f5aa3542c26c76a1b80da61ec6752019216131 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -89,7 +89,7 @@ class SGDOpCUDAKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in_height, out_dims[0]); auto& in_value = grad->value(); - auto& in_rows = grad->rows(); + framework::Vector in_rows(grad->rows()); int64_t in_row_numel = in_value.numel() / in_rows.size(); PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); @@ -102,7 +102,7 @@ class SGDOpCUDAKernel : public framework::OpKernel { dim3 grid(1, in_rows.size()); SparseSGDFunctorKernel< T, 256><<>>( - in_data, in_rows.data(), learning_rate->data(), out_data, + in_data, in_rows.cuda_data(), learning_rate->data(), out_data, in_row_numel); } else { diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index 48201b344de0d3bd2b121a12389876dad095f10d..3d8102c3ae20c8b714cd48b4fc78dc18a0cf89a7 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -68,7 +68,32 @@ class SumKernel : public framework::OpKernel { } } } else if (out_var->IsType()) { - PADDLE_ENFORCE(!in_place, "SelectedRows not support inplace sum now"); + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto &in_sel0 = in_vars[0]->Get(); + auto &rows = in_sel0.rows(); +#ifdef PADDLE_WITH_CUDA + std::vector rows_in_cpu; + rows_in_cpu.reserve(rows.size()); + for (auto item : rows) { + rows_in_cpu.push_back(item); + } + in0.reset(new framework::SelectedRows(rows_in_cpu, in_sel0.height())); +#else + in0.reset(new framework::SelectedRows(rows, in_sel0.height())); +#endif + in0->mutable_value()->ShareDataWith(in_sel0.value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows & { + if (i == 0 && in0) { + return *in0.get(); + } else { + return in_vars[i]->Get(); + } + }; + auto *out = context.Output("Out"); out->mutable_rows()->clear(); auto *out_value = out->mutable_value(); @@ -76,24 +101,26 @@ class SumKernel : public framework::OpKernel { // Runtime InferShape size_t first_dim = 0; for (int i = 0; i < N; i++) { - first_dim += in_vars[i]->Get().rows().size(); + auto &sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); } - auto in_dim = in_vars[0]->Get().value().dims(); - auto in_dim_vec = framework::vectorize(in_dim); - in_dim_vec[0] = static_cast(first_dim); + auto in_dim = + framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); - out_value->Resize(framework::make_ddim(in_dim_vec)); + out_value->Resize(framework::make_ddim(in_dim)); out_value->mutable_data(context.GetPlace()); math::SelectedRowsAddTo functor; int64_t offset = 0; for (int i = 0; i < N; i++) { - PADDLE_ENFORCE_EQ(out->height(), - in_vars[i]->Get().height()); - functor(context.template device_context(), - in_vars[i]->Get(), offset, out); - offset += in_vars[i]->Get().value().numel(); + auto &sel_row = get_selected_row(i); + + PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); + functor(context.template device_context(), sel_row, + offset, out); + offset += sel_row.value().numel(); } } else if (out_var->IsType()) { auto &out_array = *out_var->GetMutable(); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 490397afdd4de0cc1aafde746d31b1d800eded3b..a880d9bdbc63aacc1f2cdbc0d7da001a59c7b372 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -124,44 +124,25 @@ PYBIND11_PLUGIN(core) { .def( "__init__", [](LoDTensor &instance, const std::vector> &lod) { -#ifndef PADDLE_WITH_CUDA - new (&instance) LoDTensor(lod); -#else - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - new (&instance) LoDTensor(new_lod); -#endif + LoD new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + new (&instance) LoDTensor(new_lod); }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { -#ifndef PADDLE_WITH_CUDA - self.set_lod(lod); -#else LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); self.set_lod(new_lod); -#endif }) .def("lod", [](LoDTensor &self) -> std::vector> { -#ifndef PADDLE_WITH_CUDA - return self.lod(); -#else - auto lod = self.lod(); - std::vector> new_lod; - new_lod.reserve(lod.size()); - std::transform(lod.begin(), lod.end(), std::back_inserter(new_lod), - [](Vector item) -> - std::vector { - std::vector v; - v.reserve(item.size()); - std::copy(item.begin(), item.end(), std::back_inserter(v)); - return v; - }); - return new_lod; -#endif + auto lod = self.lod(); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; }); py::class_(m, "SelectedRows") diff --git a/paddle/scripts/docker/README.md b/paddle/scripts/docker/README.md index f0620498cfa6775ce2949cc02fa9f6c9529dec2e..65c46745556bc5ea91fdd4e33060f2535422e8e8 100644 --- a/paddle/scripts/docker/README.md +++ b/paddle/scripts/docker/README.md @@ -56,7 +56,7 @@ Users can specify the following Docker build arguments with either "ON" or "OFF" | ------ | -------- | ----------- | | `WITH_GPU` | OFF | Generates NVIDIA CUDA GPU code and relies on CUDA libraries. | | `WITH_AVX` | OFF | Set to "ON" to enable AVX support. | -| `WITH_TESTING` | ON | Build unit tests binaries. | +| `WITH_TESTING` | OFF | Build unit tests binaries. | | `WITH_MKL` | ON | Build with [Intel® MKL](https://software.intel.com/en-us/mkl) and [Intel® MKL-DNN](https://github.com/01org/mkl-dnn) support. | | `WITH_GOLANG` | ON | Build fault-tolerant parameter server written in go. | | `WITH_SWIG_PY` | ON | Build with SWIG python API support. | diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index a7fb50ee4149a3c36077f83383f45f3106e7e0f1..a2f21e37e415ccaa0d9624656728d89739972905 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -22,7 +22,9 @@ limitations under the License. */ int main(int argc, char** argv) { std::vector new_argv; std::string gflags_env; - new_argv.push_back(argv[0]); + for (int i = 0; i < argc; ++i) { + new_argv.push_back(argv[i]); + } #ifdef PADDLE_WITH_CUDA new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 4fdf4090212e31adcccf6b119c937e70d5cbf995..186b91c226accbe1c2d5465d6244b9438eec9979 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -140,8 +140,13 @@ def init_config_environment( g_submodel_stack=[], g_add_submodel_suffix=False, ): - for k, v in locals().iteritems(): - globals()[k] = copy.deepcopy(v) + # directly iterate through locals().iteritems() will change + # the size of locals() due to introducing k, v into scope + # which will break the process in some env + + local_vars = copy.deepcopy(locals()) + for k, v in local_vars.iteritems(): + globals()[k] = v # Because type is widely used as a variable name in this code. diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 787416aed1acf81138df06110317614dfe77fb48..f52346c3b59264370f46844d0e6b1e2d489299c7 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -26,6 +26,7 @@ import initializer import layers import nets import optimizer +import learning_rate_decay import backward import regularizer from param_attr import ParamAttr @@ -35,27 +36,16 @@ from distribute_transpiler import DistributeTranspiler from distribute_transpiler_simple import SimpleDistributeTranspiler import clip from memory_optimization_transpiler import memory_optimize +import profiler Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ - 'io', - 'initializer', - 'layers', - 'nets', - 'optimizer', - 'backward', - 'regularizer', - 'LoDTensor', - 'CPUPlace', - 'CUDAPlace', - 'Tensor', + 'io', 'initializer', 'layers', 'nets', 'optimizer', 'learning_rate_decay', + 'backward', 'regularizer', 'LoDTensor', 'CPUPlace', 'CUDAPlace', 'Tensor', 'ParamAttr' - 'DataFeeder', - 'clip', - 'SimpleDistributeTranspiler', - 'DistributeTranspiler', - 'memory_optimize', + 'DataFeeder', 'clip', 'SimpleDistributeTranspiler', 'DistributeTranspiler', + 'memory_optimize', 'profiler' ] @@ -86,11 +76,9 @@ def __bootstrap__(): os.environ['OMP_NUM_THREADS'] = str(num_threads) - read_env_flags = [ - 'use_pinned_memory', 'check_nan_inf', 'do_memory_benchmark' - ] + read_env_flags = ['use_pinned_memory', 'check_nan_inf', 'benchmark'] if core.is_compiled_with_cuda(): - read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync'] + read_env_flags += ['fraction_of_gpu_memory_to_use'] core.init_gflags([sys.argv[0]] + ["--tryfromenv=" + ",".join(read_env_flags)]) core.init_glog(sys.argv[0]) diff --git a/python/paddle/v2/fluid/clip.py b/python/paddle/v2/fluid/clip.py index 3028029e60fde2f481b4348ab1b0a4980ebb2b60..fdbc8524abb7d6687983b026ca8e65e61c3dfd1a 100644 --- a/python/paddle/v2/fluid/clip.py +++ b/python/paddle/v2/fluid/clip.py @@ -30,6 +30,9 @@ __all__ = [ class BaseErrorClipAttr(object): + def __str__(self): + raise NotImplementedError() + def append_clip_op(self, block, grad_name): raise NotImplementedError() @@ -44,6 +47,9 @@ class ErrorClipByValue(BaseErrorClipAttr): self.max = max self.min = min + def __str__(self): + return "ByValue, min=%f, max=%f" % (self.min, self.max) + def append_clip_op(self, block, grad_name): clip_op_desc = block.desc.append_op() clip_op_desc.set_type("clip") @@ -71,6 +77,9 @@ def error_clip_callback(block, context): class BaseGradientClipAttr(object): + def __str__(self): + raise NotImplementedError() + def process_context(self, context, param, grad): raise NotImplementedError() @@ -79,6 +88,9 @@ class BaseGradientClipAttr(object): class NullGradientClipAttr(BaseGradientClipAttr): + def __str__(self): + return "Null" + def process_context(self, context, param, grad): pass @@ -96,6 +108,9 @@ class GradientClipByValue(BaseGradientClipAttr): self.max = max self.min = min + def __str__(self): + return "ByValue, min=%f, max=%f" % (self.min, self.max) + def process_context(self, context, param, grad): pass @@ -108,6 +123,9 @@ class GradientClipByNorm(BaseGradientClipAttr): def __init__(self, clip_norm): self.clip_norm = clip_norm + def __str__(self): + return "ByNorm, clip_norm=%f" % self.clip_norm + def process_context(self, context, param, grad): pass @@ -124,6 +142,10 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): self.clip_norm = clip_norm self.group_name = group_name + def __str__(self): + return "ByGlobalNorm, group_name=%s, clip_norm=%f" % (self.group_name, + self.clip_norm) + def process_context(self, context, param, grad): if self.group_name not in context: context[self.group_name] = [] @@ -160,6 +182,17 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): def set_gradient_clip(clip, param_list=None, program=None): + """ + To specify parameters that require gradient clip. + Args: + clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, + which describes the type and detailed attributes of required gradient clip. + param_list(list, None by default): Parameters that require gradient clip. + It can be a list of parameter or a list of parameter's name. + When it's None, all parameters in the program will be included. + program(Program, None by default): The program where parameters are. + Will be the default main program when assigned with None. + """ if not isinstance(clip, BaseGradientClipAttr): raise TypeError( "'clip' should be an instance of BaseGradientClipAttr's derived class" @@ -199,3 +232,5 @@ def append_gradient_clip_ops(param_grad): ClipByValue = GradientClipByValue +ClipByNorm = GradientClipByNorm +ClipByGlobalNorm = GradientClipByGlobalNorm diff --git a/python/paddle/v2/fluid/distribute_transpiler.py b/python/paddle/v2/fluid/distribute_transpiler.py index d366a7086df0794eb68e0ac1f7b95ff695577cc9..121b407cae41fa477843b7252ebacc9053d5f7aa 100644 --- a/python/paddle/v2/fluid/distribute_transpiler.py +++ b/python/paddle/v2/fluid/distribute_transpiler.py @@ -153,11 +153,18 @@ class DistributeTranspiler: self.param_grad_ep_mapping[ep]["params"].append(param) self.param_grad_ep_mapping[ep]["grads"].append(grad) + rpc_client_var = program.global_block().create_var( + name="RPC_CLIENT_VAR", + psersistable=True, + dtype='float32', # dtype and shape is not used in fact + shape=[0]) + # create send_op send_op = program.global_block().append_op( type="send", inputs={"X": send_inputs}, - outputs={"Out": send_outputs}, + outputs={"Out": send_outputs, + "RPCClient": rpc_client_var}, attrs={"endpoints": pserver_endpoints, "epmap": eplist}) # step4 diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 3d85d4550f8e53ed52aa933a17fe85171b81cc6b..ae98e299a4838db8b7089e09683c33c940cfde3d 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -14,6 +14,7 @@ import collections import contextlib +import re import numpy as np @@ -239,20 +240,30 @@ class Variable(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): + def to_string(self, throw_on_error, with_details=False): """ Get debug string. Args: throw_on_error(bool): True if raise an exception when self is not intialized. + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True Returns(str): The debug string. """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) protostr = self.desc.serialize_to_string() proto = framework_pb2.VarDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + res_str = _debug_string_(proto, throw_on_error) + if with_details: + additional_attr = ("error_clip", "stop_gradient") + for attr_name in additional_attr: + res_str += "%s: %s\n" % (attr_name, + str(getattr(self, attr_name))) + return res_str __repr__ = __str__ @@ -629,10 +640,36 @@ class Block(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): - protostr = self.desc.serialize_to_string() - proto = framework_pb2.BlockDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + re_add_indent = re.compile(r"\n(.)") + res_str = "blocks {\n idx: %d\n parent_idx: %d" % ( + self.idx, self.parent_idx) + for var in self.vars.itervalues(): + res_str += "\n vars {\n %s }" % re_add_indent.sub( + r"\n \1", var.to_string(throw_on_error, with_details)) + for op in self.ops: + res_str += "\n ops {\n %s }" % re_add_indent.sub( + r"\n \1", op.to_string(throw_on_error)) + res_str += "\n}" + else: + protostr = self.desc.serialize_to_string() + proto = framework_pb2.BlockDesc.FromString(str(protostr)) + res_str = _debug_string_(proto, throw_on_error) + return res_str __repr__ = __str__ @@ -796,10 +833,29 @@ class Program(object): def __str__(self): return self.to_string(True) - def to_string(self, throw_on_error): - protostr = self.desc.serialize_to_string() - proto = framework_pb2.ProgramDesc.FromString(str(protostr)) - return _debug_string_(proto, throw_on_error) + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + res_str = "" + for block in self.blocks: + res_str += block.to_string(throw_on_error, with_details) + else: + protostr = self.desc.serialize_to_string() + proto = framework_pb2.ProgramDesc.FromString(str(protostr)) + res_str = _debug_string_(proto, throw_on_error) + return res_str def get_desc(self): return self.desc @@ -950,6 +1006,36 @@ class Parameter(Variable): self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None) + def __str__(self): + return self.to_string(True) + + def to_string(self, throw_on_error, with_details=False): + """ + To debug string. + Args: + throw_on_error(bool): raise exception when self is not initialized + when throw_on_error is True + with_details(bool): more details about variables and parameters + (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + + Returns(str): The debug string. + + """ + assert isinstance(throw_on_error, bool) and isinstance(with_details, + bool) + if with_details: + res_str = Variable.to_string(self, throw_on_error, True) + additional_attr = ("trainable", "optimize_attr", "regularizer", + "gradient_clip_attr") + for attr_name in additional_attr: + res_str += "%s: %s\n" % (attr_name, + str(getattr(self, attr_name))) + else: + res_str = Variable.to_string(self, throw_on_error, False) + return res_str + + __repr__ = __str__ + # program is a global instance. _main_program_ = Program() diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 7d9ae53d94b6c82890150346f138e48a0dfbf15c..2119ca12c8dea6463934aa68cb1b46ec687e3f72 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -18,7 +18,7 @@ import itertools from framework import Variable, Parameter, default_main_program, default_startup_program, \ unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier -from param_attr import ParamAttr +from param_attr import ParamAttr, WeightNormParamAttr class LayerHelper(object): @@ -104,6 +104,177 @@ class LayerHelper(object): (dtype, each.dtype)) return dtype + def _create_weight_normalize(self, attr, shape, dtype): + from .layers import elementwise_mul, elementwise_div, reshape + + # Remove these ops when LayerHelper and layers support indicating + # program and block. + def __norm_op(x, + out=None, + p=2, + dim=None, + keep_dim=False, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name(".".join([self.name, 'weight_norm_norm'])), + dtype=dtype, + persistable=False) + abs_out = block.create_var( + name=unique_name(".".join([self.name, 'weight_norm_abs'])), + dtype=dtype, + persistable=False) + block.append_op( + type='abs', inputs={'X': x}, outputs={'Out': abs_out}) + pow_out = block.create_var( + name=unique_name(".".join([self.name, 'weight_norm_pow'])), + dtype=dtype, + persistable=False) + block.append_op( + type='pow', + inputs={'X': abs_out}, + outputs={'Out': pow_out}, + attrs={'factor': float(p)}) + sum_out = block.create_var( + name=unique_name(".".join([self.name, 'weight_norm_sum'])), + dtype=dtype, + persistable=False) + block.append_op( + type='reduce_sum', + inputs={'X': pow_out}, + outputs={'Out': sum_out}, + attrs={ + 'dim': dim, + 'keep_dim': keep_dim, + 'reduce_all': True if dim is None else False + }) + block.append_op( + type='pow', + inputs={'X': sum_out}, + outputs={'Out': out}, + attrs={'factor': 1. / p}) + return out + + def __reshape_op(x, + shape, + out=None, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name(".".join( + [self.name, 'weight_norm_reshape'])), + dtype=dtype, + persistable=False) + block.append_op( + type='reshape', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'shape': shape}) + return out + + def __transpose_op(x, + axis, + out=None, + block=self.startup_program.global_block()): + if out is None: + out = block.create_var( + name=unique_name(".".join( + [self.name, 'weight_norm_transpose'])), + dtype=dtype, + persistable=False) + block.append_op( + type='transpose', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'axis': axis}) + return out + + def __norm_except_dim(x, + out=None, + dim=None, + block=self.startup_program.global_block()): + """Computes the norm over all dimensions except dim""" + if out is None: + out = block.create_var( + name=unique_name(".".join([self.name, 'weight_norm_norm'])), + dtype=dtype, + persistable=False) + if dim is None: + __norm_op(x, out, dim=dim, block=block) + elif dim == 0: + out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1) + reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block) + norm = __norm_op(reshape, dim=1, block=block) + __reshape_op(norm, out=out, shape=out_shape, block=block) + elif dim == len(x.shape) - 1: + out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]] + reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block) + norm = __norm_op(reshape, dim=0, block=block) + __reshape_op(norm, out=out, shape=out_shape, block=block) + else: + perm = range(len(x.shape)) + perm[0], perm[dim] = dim, 0 + transpose = __transpose_op(x, perm, block=block) + norm = __norm_op(transpose, dim=0, block=block) + __transpose_op(norm, perm, out=out, block=block) + return out + + def __weight_normalize(g, v, dim): + """Calculations for weight normalization""" + norm = __norm_except_dim( + v, dim=dim, block=self.main_program.current_block()) + scale = elementwise_div( + x=g, y=norm) # The shapes of g and norm are the same. + # Currently, elementwise_mul only support broadcast when the shape + # of y is a subset of the shape of x. Thus, we reshape y to squeeze + # to achive the subset. + w = elementwise_mul( + x=v, + y=scale if dim is None else reshape( + x=scale, shape=[v.shape[dim]]), + axis=-1 if dim is None else dim) + # To serialize the original parameter for inference, maybe a + # parameter rather than a variable should be returned. + return w + + g_param_attr = copy.deepcopy(attr) + g_param_attr.name = attr.name + '_g' + g_param_shape = [1] * len(shape) + if attr.dim is not None: + g_param_shape[attr.dim] = shape[attr.dim] + v_param_attr = copy.deepcopy(attr) + v_param_attr.name = attr.name + '_v' + v_param_shape = shape + + # Add to startup_program to initialize g and v. + # Try to reconstruct the initializer of w by initializing g and v. + # Set the initializers of g and v as below, then the distribution + # of w is the same as initializing w with the given initializer. + # For Data-Dependent Initialization, please compute the init-values + # of g and v in external and then feed the values to g and v by + # executing an extra program. + g_param = self.startup_program.global_block().create_parameter( + dtype=dtype, + shape=g_param_shape, + **g_param_attr.to_kwargs(with_initializer=False)) + v_param = self.startup_program.global_block().create_parameter( + dtype=dtype, + shape=v_param_shape, + **v_param_attr.to_kwargs(with_initializer=True)) + __norm_except_dim( + x=v_param, + out=g_param, + dim=attr.dim, + block=self.startup_program.global_block()) + + # Add weight normalization to main_program + g_param = self.main_program.global_block().create_parameter( + dtype=dtype, shape=g_param_shape, **g_param_attr.to_kwargs()) + v_param = self.main_program.global_block().create_parameter( + dtype=dtype, shape=v_param_shape, **v_param_attr.to_kwargs()) + w_param = __weight_normalize(g_param, v_param, dim=attr.dim) + return w_param + def create_parameter(self, attr, shape, @@ -114,16 +285,23 @@ class LayerHelper(object): attr = copy.deepcopy(attr) assert isinstance(attr, ParamAttr) suffix = 'b' if is_bias else 'w' + if attr.name is None: + attr.name = unique_name(".".join([self.name, suffix])) - if default_initializer is None: + if default_initializer is None and attr.initializer is None: if is_bias: attr.set_default_bias_initializer() else: attr.set_default_param_initializer() else: attr.set_default_initializer(default_initializer) - if attr.name is None: - attr.name = unique_name(".".join([self.name, suffix])) + + # If weight normalization is set, insert extra parameters and ops. + # Refer to https://arxiv.org/pdf/1602.07868.pdf + if isinstance(attr, WeightNormParamAttr): + param = self._create_weight_normalize(attr, shape, dtype) + WeightNormParamAttr.params_with_weight_norm.append(param) + return param self.startup_program.global_block().create_parameter( dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) diff --git a/python/paddle/v2/fluid/layers/math_op_patch.py b/python/paddle/v2/fluid/layers/math_op_patch.py index f359e70126f7601b75261e795b5a37bdc241112e..79a130a3eb148e6c5a8fa3cdf174780b354c23c9 100644 --- a/python/paddle/v2/fluid/layers/math_op_patch.py +++ b/python/paddle/v2/fluid/layers/math_op_patch.py @@ -145,7 +145,9 @@ def monkey_patch_variable(): # a*b == b*a. Do not need to reverse explicitly ("__rmul__", "elementwise_mul", False), ("__div__", "elementwise_div", False), - ("__rdiv__", "elementwise_div", True)): + ("__rdiv__", "elementwise_div", True), + ("__pow__", "elementwise_pow", False), + ("__rpow__", "elementwise_pow", True)): setattr(Variable, method_name, _elemwise_method_creator_(method_name, op_type, reverse)) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index d11dccfd22124d58d8634c01a00527c373b92f00..c38e21087de1bf7076ce5aaf23d4d4faaebb50a7 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -847,7 +847,35 @@ def cos_sim(X, Y, **kwargs): return out -def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs): +def dropout(x, dropout_prob, is_test=False, seed=None, **kwargs): + """ + Computes dropout. + + Drop or keep each element of `x` independently. Dropout is a regularization + technique for reducing overfitting by preventing neuron co-adaption during + training. The dropout operator randomly set (according to the given dropout + probability) the outputs of some units to zero, while others are remain + unchanged. + + Args: + x(variable): The input tensor. + dropout_prob(float): Probability of setting units to zero. + is_test(bool): A flag indicating whether it is in test phrase or not. + seed(int): A Python integer used to create random seeds. If this + parameter is set to None, a random seed is used. + NOTE: If an integer seed is given, always the same output + units will be dropped. DO NOT use a fixed seed in training. + + Returns: + Variable: A tensor variable. + + Examples: + .. code-block:: python + + x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + """ + helper = LayerHelper('dropout', **kwargs) out = helper.create_tmp_variable(dtype=x.dtype) mask = helper.create_tmp_variable(dtype=x.dtype, stop_gradient=True) @@ -856,9 +884,12 @@ def dropout(x, dropout_prob, is_test=False, seed=0, **kwargs): inputs={'X': [x]}, outputs={'Out': [out], 'Mask': [mask]}, - attrs={'dropout_prob': dropout_prob, - 'is_test': is_test, - 'seed': seed}) + attrs={ + 'dropout_prob': dropout_prob, + 'is_test': is_test, + 'fix_seed': seed is not None, + 'seed': seed if seed is not None else 0 + }) return out diff --git a/python/paddle/v2/fluid/layers/ops.py b/python/paddle/v2/fluid/layers/ops.py index 022a94cad440f13383a927233195bb008a688843..ee3172c7b8dfd65c693e5aee9b55179e654ce7be 100644 --- a/python/paddle/v2/fluid/layers/ops.py +++ b/python/paddle/v2/fluid/layers/ops.py @@ -56,6 +56,7 @@ __all__ = [ 'elementwise_mul', 'elementwise_max', 'elementwise_min', + 'elementwise_pow', 'clip', 'clip_by_norm', 'sequence_softmax', diff --git a/python/paddle/v2/fluid/layers/tensor.py b/python/paddle/v2/fluid/layers/tensor.py index 6e7d09459c07c77a8579300a1c67ae36dc3d2ba2..c435c5206d1ef1ef57683a1a47bf089be6526f38 100644 --- a/python/paddle/v2/fluid/layers/tensor.py +++ b/python/paddle/v2/fluid/layers/tensor.py @@ -16,12 +16,14 @@ from ..layer_helper import LayerHelper from ..param_attr import ParamAttr from ..framework import convert_np_dtype_to_dtype_ from ..framework import Variable +from ..initializer import Constant from ..core import DataType import numpy __all__ = [ 'create_tensor', 'create_parameter', + 'create_global_var', 'cast', 'concat', 'sums', @@ -58,13 +60,22 @@ def create_parameter(shape, Returns: Parameter: the created parameter """ - helper = LayerHelper("create_parameter") + helper = LayerHelper("create_parameter", **locals()) if attr is None: attr = ParamAttr() return helper.create_parameter(attr, shape, dtype, is_bias, default_initializer) +def create_global_var(shape, value, dtype, persistable=False, name=None): + helper = LayerHelper("global_var", **locals()) + var = helper.create_global_variable( + dtype=dtype, shape=shape, persistable=persistable, name=name) + helper.set_variable_initializer( + var, initializer=Constant(value=float(value))) + return var + + def cast(x, dtype): """ This function takes in the input with input_dtype diff --git a/python/paddle/v2/fluid/learning_rate_decay.py b/python/paddle/v2/fluid/learning_rate_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..96b3e9a0d73cede5d6e36308a53ab8927a95a6da --- /dev/null +++ b/python/paddle/v2/fluid/learning_rate_decay.py @@ -0,0 +1,125 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import layers +from framework import Variable + +__all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay'] +""" +When training a model, it's often useful to decay the +learning rate during training process, this is called +learning_rate_decay. There are many strategies to do +this, this module will provide some classical method. +User can also implement their own learning_rate_decay +strategy according to this module. +""" + + +def exponential_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies exponential decay to the learning rate. + + ```python + decayed_learning_rate = learning_rate * + decay_rate ^ (global_step / decay_steps) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for exponential_decay.") + + # update learning_rate + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + return learning_rate * (decay_rate**div_res) + + +def natural_exp_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies natural exponential decay to the initial learning rate. + + ```python + if not staircase: + decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps)) + else: + decayed_learning_rate = learning_rate * exp(- decay_rate * (global_step / decay_steps)) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for natural_exp_decay.") + + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + return learning_rate * layers.exp(x=(-1 * decay_rate * div_res)) + + +def inverse_time_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + """Applies inverse time decay to the initial learning rate. + + ```python + if staircase: + decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) + else + decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) + ``` + Args: + learning_rate: A scalar float32 value or a Variable. This + will be the initial learning rate during training + global_step: A Variable that record the training step. + decay_steps: A Python `int32` number. + decay_rate: A Python `float` number. + staircase: Boolean. If set true, decay the learning rate every decay_steps. + + Returns: + The decayed learning rate + """ + if not isinstance(global_step, Variable): + raise ValueError("global_step is required for inverse_time_decay.") + + div_res = global_step / decay_steps + if staircase: + div_res = layers.floor(x=div_res) + + return learning_rate / (1 + decay_rate * div_res) diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 0c3533b892176edd5dfd111fdd771cc17d468168..7844a4e2df1ce3989e48082f6472292560fbf1ee 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -15,6 +15,7 @@ from collections import defaultdict import framework +import layers from backward import append_backward from framework import unique_name, program_guard from initializer import Constant @@ -33,9 +34,11 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, global_step=None, regularization=None): + def __init__(self, learning_rate, global_step=None, regularization=None): + assert learning_rate is not None self._global_step = global_step self.regularization = regularization + self._global_learning_rate = learning_rate # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters # to train. These variables are called accumulators. @@ -43,6 +46,28 @@ class Optimizer(object): self._accumulators = defaultdict(lambda: dict()) self.helper = None + def _create_global_learning_rate(self): + if isinstance(self._global_learning_rate, float): + self._global_learning_rate = layers.create_global_var( + name=unique_name("learning_rate"), + shape=[1], + value=float(self._global_learning_rate), + dtype='float32', + persistable=True) + + if not isinstance(self._global_learning_rate, framework.Variable): + raise ValueError("learning rate should be a Variable, " + "actual type is %s", + type(self._global_learning_rate)) + + @property + def global_learning_rate(self): + """ + get global decayed learning rate + :return: + """ + return self._global_learning_rate + def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op """ @@ -52,17 +77,7 @@ class Optimizer(object): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] - param_lr_shape = [1] - param_lr_var = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=param_lr_shape, - lod_level=1, - persistable=True) - param_lr = param_lr * self._learning_rate - self.helper.set_variable_initializer( - var=param_lr_var, initializer=Constant(param_lr)) - return param_lr_var + return self._global_learning_rate * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -163,7 +178,7 @@ class Optimizer(object): optimization. This will include parameter update ops, global step update ops and any other custom ops required by subclasses to manage their internal state. - :param startup_program: + :param startup_program: """ # This is a default implementation of create_optimization_pass that # can be shared by most optimizers. This implementation assumes that @@ -178,6 +193,7 @@ class Optimizer(object): self.helper = LayerHelper(self.__class__.__name__) self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) + self._create_global_learning_rate() optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -231,9 +247,9 @@ class SGDOptimizer(Optimizer): def __init__(self, learning_rate, **kwargs): assert learning_rate is not None - super(SGDOptimizer, self).__init__(**kwargs) + super(SGDOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "sgd" - self._learning_rate = learning_rate def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) @@ -259,9 +275,9 @@ class MomentumOptimizer(Optimizer): def __init__(self, learning_rate, momentum, use_nesterov=False, **kwargs): assert learning_rate is not None assert momentum is not None - super(MomentumOptimizer, self).__init__(**kwargs) + super(MomentumOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "momentum" - self._learning_rate = learning_rate self._momentum = momentum self._use_nesterov = bool(use_nesterov) @@ -303,9 +319,9 @@ class AdagradOptimizer(Optimizer): def __init__(self, learning_rate, epsilon=1.0e-6, **kwargs): assert learning_rate is not None assert epsilon is not None - super(AdagradOptimizer, self).__init__(**kwargs) + super(AdagradOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adagrad" - self._learning_rate = learning_rate self._epsilon = epsilon def _create_accumulators(self, block, parameters): @@ -352,9 +368,9 @@ class AdamOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamOptimizer, self).__init__(**kwargs) + super(AdamOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adam" - self._learning_rate = learning_rate self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon @@ -457,9 +473,9 @@ class AdamaxOptimizer(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None - super(AdamaxOptimizer, self).__init__(**kwargs) + super(AdamaxOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "adamax" - self._learning_rate = learning_rate self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon @@ -535,9 +551,9 @@ class DecayedAdagradOptimizer(Optimizer): assert decay is not None assert epsilon is not None - super(DecayedAdagradOptimizer, self).__init__(**kwargs) + super(DecayedAdagradOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) self.type = "decayed_adagrad" - self._learning_rate = learning_rate self._decay = decay self._epsilon = epsilon diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py index dcca8b6c547d10864ff4cd0af1c217d89e3b522f..fc566b8a2480ce9256d610b4731405cd6d89b7e4 100644 --- a/python/paddle/v2/fluid/param_attr.py +++ b/python/paddle/v2/fluid/param_attr.py @@ -15,7 +15,10 @@ from initializer import Initializer, Xavier, Constant from regularizer import WeightDecayRegularizer -__all__ = ['ParamAttr'] +__all__ = [ + 'ParamAttr', + 'WeightNormParamAttr', +] class ParamAttr(object): @@ -82,3 +85,20 @@ class ParamAttr(object): if with_initializer: kwargs['initializer'] = self.initializer return kwargs + + +class WeightNormParamAttr(ParamAttr): + """ + Used for weight normalization. Any field in ParamAttr can also be set here. + Besides, an extra field dim can be set to indicate the dimension except + which to normalize. + """ + # List to record the parameters reparameterized by weight normalization. + # If these parameters are treated as Variable rather than Parameter, + # it can be used to discriminate these parameters and help to serialize + # these paramters for inference. + params_with_weight_norm = [] + + def __init__(self, dim=None, **kwargs): + super(WeightNormParamAttr, self).__init__(**kwargs) + self.dim = dim diff --git a/python/paddle/v2/fluid/profiler.py b/python/paddle/v2/fluid/profiler.py index 51c1c8aa705513825b46fb936c6c99090c50fb7d..d4a2cd7eeabecb60699b5be94d89cf7a916749e7 100644 --- a/python/paddle/v2/fluid/profiler.py +++ b/python/paddle/v2/fluid/profiler.py @@ -12,11 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.v2.fluid.core as core +import core from contextlib import contextmanager import os -__all__ = ['CudaProfiler'] +__all__ = ['cuda_profiler', 'reset_profiler', 'profiler'] NVPROF_CONFIG = [ "gpustarttimestamp", diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index c2f28eecfda71e305d96c5a6b62c4f5f0fbf3fa6..0273da647afb6e95a136b5ecd0975347d9a378ff 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -87,6 +87,11 @@ class WeightDecayRegularizer(object): """ raise NotImplementedError() + def __str__(self): + """Debug string + """ + raise NotImplementedError() + class L2DecayRegularizer(WeightDecayRegularizer): """Implements the L2 Weight Decay Regularization @@ -123,6 +128,9 @@ class L2DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L2Decay, regularization_coeff=%f" % self._regularization_coeff + class L1DecayRegularizer(WeightDecayRegularizer): """Implements the L1 Weight Decay Regularization @@ -163,6 +171,9 @@ class L1DecayRegularizer(WeightDecayRegularizer): return decay + def __str__(self): + return "L1Decay, regularization_coeff=%f" % self._regularization_coeff + # We short the class name, since users will use the regulaizer with the package # name. The sample code: diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 1a342bf1fbbc0e5f4e3c7d440424b66c4b9f732f..f85768de99adb8b5005b23278ad807a24c5bff65 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -175,7 +175,7 @@ def main(): paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=BATCH_SIZE) - #place = fluid.CPUPlace() + # place = fluid.CPUPlace() place = fluid.CUDAPlace(0) feeder = fluid.DataFeeder( feed_list=[ diff --git a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py index ac7ef4046f9ff55c2cbfc28b50784b9bffb80d53..b4b6020f58e7538dfe0f98c17d61f3614c3c6fc4 100644 --- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py @@ -45,8 +45,9 @@ BATCH_SIZE = 64 def loss_net(hidden, label): prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) - return fluid.layers.mean(x=loss), fluid.layers.accuracy( - input=prediction, label=label) + avg_loss = fluid.layers.mean(x=loss) + acc = fluid.layers.accuracy(input=prediction, label=label) + return prediction, avg_loss, acc def mlp(img, label): @@ -73,8 +74,7 @@ def conv_net(img, label): return loss_net(conv_pool_2, label) -def main(): - args = parse_arg() +def train(args, save_dirname=None): print("recognize digits with args: {0}".format(" ".join(sys.argv[1:]))) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') @@ -91,7 +91,8 @@ def main(): with pd.do(): img_ = pd.read_input(img) label_ = pd.read_input(label) - for o in net_conf(img_, label_): + prediction, avg_loss, acc = net_conf(img_, label_) + for o in [avg_loss, acc]: pd.write_output(o) avg_loss, acc = pd() @@ -99,7 +100,7 @@ def main(): avg_loss = fluid.layers.mean(x=avg_loss) acc = fluid.layers.mean(x=acc) else: - avg_loss, acc = net_conf(img, label) + prediction, avg_loss, acc = net_conf(img, label) test_program = fluid.default_main_program().clone() @@ -137,7 +138,10 @@ def main(): acc_val = numpy.array(acc_set).mean() avg_loss_val = numpy.array(avg_loss_set).mean() if float(acc_val) > 0.85: # test acc > 85% - exit(0) + if save_dirname is not None: + fluid.io.save_inference_model(save_dirname, ["img"], + [prediction], exe) + return else: print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. @@ -145,5 +149,36 @@ def main(): float(avg_loss_val), float(acc_val))) +def infer(args, save_dirname=None): + if save_dirname is None: + return + + place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) + + # The input's dimension of conv should be 4-D or 5-D. + tensor_img = numpy.random.rand(1, 1, 28, 28).astype("float32") + + # Construct feed as a dictionary of {feed_target_name: feed_target_data} + # and results will contain a list of data corresponding to fetch_targets. + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + print("infer results: ", results[0]) + + if __name__ == '__main__': - main() + args = parse_arg() + if not args.use_cuda and not args.parallel: + save_dirname = "recognize_digits_" + args.nn_type + ".inference.model" + else: + save_dirname = None + train(args, save_dirname) + infer(args, save_dirname) diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py similarity index 52% rename from python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py rename to python/paddle/v2/fluid/tests/book/test_understand_sentiment.py index 529223eba8af6d968b490068f34559880312515d..2ba9077a26202b1c16cc480823115f7ad55c2c67 100644 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_dynamic_lstm.py +++ b/python/paddle/v2/fluid/tests/book/test_understand_sentiment.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,9 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np -import paddle.v2 as paddle +import unittest import paddle.v2.fluid as fluid +import paddle.v2 as paddle +import contextlib + + +def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, + hid_dim=32): + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) + conv_3 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + conv_4 = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=hid_dim, + filter_size=4, + act="tanh", + pool_type="sqrt") + prediction = fluid.layers.fc(input=[conv_3, conv_4], + size=class_dim, + act="softmax") + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) + adam_optimizer.minimize(avg_cost) + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, accuracy def stacked_lstm_net(data, @@ -51,63 +78,77 @@ def stacked_lstm_net(data, avg_cost = fluid.layers.mean(x=cost) adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) adam_optimizer.minimize(avg_cost) - accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) - return avg_cost, accuracy, accuracy.metrics[0] - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 + accuracy = fluid.layers.accuracy(input=prediction, label=label) + return avg_cost, accuracy - word_dict = paddle.dataset.imdb.word_dict() - print "load word dict successfully" + +def main(word_dict, net_method, use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + + BATCH_SIZE = 128 + PASS_NUM = 5 dict_dim = len(word_dict) class_dim = 2 data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - cost, accuracy, acc_out = stacked_lstm_net( + cost, acc_out = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) - place = fluid.CPUPlace() + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): - accuracy.reset(exe) for data in train_data(): cost_val, acc_val = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[cost, acc_out]) - pass_acc = accuracy.eval(exe) - print("cost=" + str(cost_val) + " acc=" + str(acc_val) + - " pass_acc=" + str(pass_acc)) - if cost_val < 1.0 and acc_val > 0.8: - exit(0) - exit(1) + print("cost=" + str(cost_val) + " acc=" + str(acc_val)) + if cost_val < 0.4 and acc_val > 0.8: + return + raise AssertionError("Cost is too large for {0}".format( + net_method.__name__)) + + +class TestUnderstandSentiment(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.word_dict = paddle.dataset.imdb.word_dict() + + @contextlib.contextmanager + def new_program_scope(self): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + def test_conv_cpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=convolution_net, use_cuda=False) + + def test_stacked_lstm_cpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=stacked_lstm_net, use_cuda=False) + + def test_conv_gpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=convolution_net, use_cuda=True) + + def test_stacked_lstm_gpu(self): + with self.new_program_scope(): + main(self.word_dict, net_method=stacked_lstm_net, use_cuda=True) if __name__ == '__main__': - main() + unittest.main() diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py deleted file mode 100644 index df27399dd215a579d7e3f8a1659180a06b1e7f64..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_conv.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function -import numpy as np -import paddle.v2 as paddle -import paddle.v2.fluid as fluid - - -def convolution_net(data, label, input_dim, class_dim=2, emb_dim=32, - hid_dim=32): - emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim]) - conv_3 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=3, - act="tanh", - pool_type="sqrt") - conv_4 = fluid.nets.sequence_conv_pool( - input=emb, - num_filters=hid_dim, - filter_size=4, - act="tanh", - pool_type="sqrt") - prediction = fluid.layers.fc(input=[conv_3, conv_4], - size=class_dim, - act="softmax") - cost = fluid.layers.cross_entropy(input=prediction, label=label) - avg_cost = fluid.layers.mean(x=cost) - adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) - adam_optimizer.minimize(avg_cost) - accuracy = fluid.evaluator.Accuracy(input=prediction, label=label) - return avg_cost, accuracy, accuracy.metrics[0] - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 - - word_dict = paddle.dataset.imdb.word_dict() - dict_dim = len(word_dict) - class_dim = 2 - - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - cost, accuracy, acc_out = convolution_net( - data, label, input_dim=dict_dim, class_dim=class_dim) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=1000), - batch_size=BATCH_SIZE) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - feeder = fluid.DataFeeder(feed_list=[data, label], place=place) - - exe.run(fluid.default_startup_program()) - - for pass_id in xrange(PASS_NUM): - accuracy.reset(exe) - for data in train_data(): - cost_val, acc_val = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[cost, acc_out]) - pass_acc = accuracy.eval(exe) - print("cost=" + str(cost_val) + " acc=" + str(acc_val) + - " pass_acc=" + str(pass_acc)) - if cost_val < 1.0 and pass_acc > 0.8: - exit(0) - exit(1) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py b/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py deleted file mode 100644 index 117f74c59ad5bf6bb67711801cd7b9a41f39f1f8..0000000000000000000000000000000000000000 --- a/python/paddle/v2/fluid/tests/book/test_understand_sentiment_lstm.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import paddle.v2 as paddle -import paddle.v2.fluid as fluid -from paddle.v2.fluid.layer_helper import LayerHelper - - -def lstm(x, c_pre_init, hidden_dim, forget_bias=None): - """ - This function helps create an operator for the LSTM (Long Short Term - Memory) cell that can be used inside an RNN. - """ - helper = LayerHelper('lstm_unit', **locals()) - rnn = fluid.layers.StaticRNN() - with rnn.step(): - c_pre = rnn.memory(init=c_pre_init) - x_t = rnn.step_input(x) - - before_fc = fluid.layers.concat(input=[x_t, c_pre], axis=1) - after_fc = fluid.layers.fc(input=before_fc, size=hidden_dim * 4) - - dtype = x.dtype - c = helper.create_tmp_variable(dtype) - h = helper.create_tmp_variable(dtype) - - helper.append_op( - type='lstm_unit', - inputs={"X": after_fc, - "C_prev": c_pre}, - outputs={"C": c, - "H": h}, - attrs={"forget_bias": forget_bias}) - - rnn.update_memory(c_pre, c) - rnn.output(h) - - return rnn() - - -def lstm_net(dict_dim, class_dim=2, emb_dim=32, seq_len=80, batch_size=50): - data = fluid.layers.data( - name="words", - shape=[seq_len * batch_size, 1], - append_batch_size=False, - dtype="int64", - lod_level=1) - label = fluid.layers.data( - name="label", - shape=[batch_size, 1], - append_batch_size=False, - dtype="int64") - - emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) - emb = fluid.layers.reshape(x=emb, shape=[batch_size, seq_len, emb_dim]) - emb = fluid.layers.transpose(x=emb, perm=[1, 0, 2]) - - c_pre_init = fluid.layers.fill_constant( - dtype=emb.dtype, shape=[batch_size, emb_dim], value=0.0) - c_pre_init.stop_gradient = False - layer_1_out = lstm(emb, c_pre_init=c_pre_init, hidden_dim=emb_dim) - layer_1_out = fluid.layers.transpose(x=layer_1_out, perm=[1, 0, 2]) - - prediction = fluid.layers.fc(input=layer_1_out, - size=class_dim, - act="softmax") - cost = fluid.layers.cross_entropy(input=prediction, label=label) - - avg_cost = fluid.layers.mean(x=cost) - adam_optimizer = fluid.optimizer.Adam(learning_rate=0.002) - adam_optimizer.minimize(avg_cost) - acc = fluid.layers.accuracy(input=prediction, label=label) - - return avg_cost, acc - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res - - -def chop_data(data, chop_len=80, batch_size=50): - data = [(x[0][:chop_len], x[1]) for x in data if len(x[0]) >= chop_len] - - return data[:batch_size] - - -def prepare_feed_data(data, place): - tensor_words = to_lodtensor(map(lambda x: x[0], data), place) - - label = np.array(map(lambda x: x[1], data)).astype("int64") - label = label.reshape([len(label), 1]) - tensor_label = fluid.LoDTensor() - tensor_label.set(label, place) - - return tensor_words, tensor_label - - -def main(): - BATCH_SIZE = 100 - PASS_NUM = 5 - - word_dict = paddle.dataset.imdb.word_dict() - print "load word dict successfully" - dict_dim = len(word_dict) - class_dim = 2 - - cost, acc = lstm_net(dict_dim=dict_dim, class_dim=class_dim) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.imdb.train(word_dict), buf_size=BATCH_SIZE * 10), - batch_size=BATCH_SIZE) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - exe.run(fluid.default_startup_program()) - - for pass_id in xrange(PASS_NUM): - for data in train_data(): - chopped_data = chop_data(data) - tensor_words, tensor_label = prepare_feed_data(chopped_data, place) - - outs = exe.run(fluid.default_main_program(), - feed={"words": tensor_words, - "label": tensor_label}, - fetch_list=[cost, acc]) - cost_val = np.array(outs[0]) - acc_val = np.array(outs[1]) - - print("cost=" + str(cost_val) + " acc=" + str(acc_val)) - if acc_val > 0.7: - exit(0) - exit(1) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index 8cf54846fe5dba2742ce69e34e0788e124a1a85d..766ba9681d1bb816170e0458f540b32511c02933 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -12,76 +12,145 @@ # See the License for the specific language governing permissions and # limitations under the License. -import numpy as np import paddle.v2 as paddle import paddle.v2.fluid as fluid +import unittest +import os -PASS_NUM = 100 -EMBED_SIZE = 32 -HIDDEN_SIZE = 256 -N = 5 -BATCH_SIZE = 32 -IS_SPARSE = True - -word_dict = paddle.dataset.imikolov.build_dict() -dict_size = len(word_dict) - -first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') -second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') -third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') -forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') -next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') - -embed_first = fluid.layers.embedding( - input=first_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_second = fluid.layers.embedding( - input=second_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_third = fluid.layers.embedding( - input=third_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') -embed_forth = fluid.layers.embedding( - input=forth_word, - size=[dict_size, EMBED_SIZE], - dtype='float32', - is_sparse=IS_SPARSE, - param_attr='shared_w') - -concat_embed = fluid.layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], axis=1) -hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') -predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') -cost = fluid.layers.cross_entropy(input=predict_word, label=next_word) -avg_cost = fluid.layers.mean(x=cost) -sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) -sgd_optimizer.minimize(avg_cost) - -train_reader = paddle.batch( - paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) - -place = fluid.CPUPlace() -exe = fluid.Executor(place) -feeder = fluid.DataFeeder( - feed_list=[first_word, second_word, third_word, forth_word, next_word], - place=place) - -exe.run(fluid.default_startup_program()) - -for pass_id in range(PASS_NUM): - for data in train_reader(): - avg_cost_np = exe.run(fluid.default_main_program(), - feed=feeder.feed(data), - fetch_list=[avg_cost]) - if avg_cost_np[0] < 5.0: - exit(0) # if avg cost less than 10.0, we think our code is good. -exit(1) + +def main(use_cuda, is_sparse, parallel): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + + PASS_NUM = 100 + EMBED_SIZE = 32 + HIDDEN_SIZE = 256 + N = 5 + BATCH_SIZE = 32 + IS_SPARSE = is_sparse + + def __network__(words): + embed_first = fluid.layers.embedding( + input=words[0], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_second = fluid.layers.embedding( + input=words[1], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_third = fluid.layers.embedding( + input=words[2], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_forth = fluid.layers.embedding( + input=words[3], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + + concat_embed = fluid.layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) + hidden1 = fluid.layers.fc(input=concat_embed, + size=HIDDEN_SIZE, + act='sigmoid') + predict_word = fluid.layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) + avg_cost = fluid.layers.mean(x=cost) + return avg_cost + + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + + first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') + second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') + third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') + next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') + + if not parallel: + avg_cost = __network__( + [first_word, second_word, third_word, forth_word, next_word]) + else: + places = fluid.layers.get_places() + pd = fluid.layers.ParallelDo(places) + with pd.do(): + avg_cost = __network__( + map(pd.read_input, [ + first_word, second_word, third_word, forth_word, next_word + ])) + pd.write_output(avg_cost) + + avg_cost = fluid.layers.mean(x=pd()) + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + feeder = fluid.DataFeeder( + feed_list=[first_word, second_word, third_word, forth_word, next_word], + place=place) + + exe.run(fluid.default_startup_program()) + + for pass_id in range(PASS_NUM): + for data in train_reader(): + avg_cost_np = exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[avg_cost]) + if avg_cost_np[0] < 5.0: + return + raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) + + +FULL_TEST = os.getenv('FULL_TEST', + '0').lower() in ['true', '1', 't', 'y', 'yes', 'on'] +SKIP_REASON = "Only run minimum number of tests in CI server, to make CI faster" + + +class W2VTest(unittest.TestCase): + pass + + +def inject_test_method(use_cuda, is_sparse, parallel): + fn_name = "test_{0}_{1}_{2}".format("cuda" if use_cuda else "cpu", "sparse" + if is_sparse else "dense", "parallel" + if parallel else "normal") + + def __impl__(*args, **kwargs): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + main(use_cuda=use_cuda, is_sparse=is_sparse, parallel=parallel) + + if use_cuda and is_sparse and parallel: + fn = __impl__ + else: + # skip the other test when on CI server + fn = unittest.skipUnless( + condition=FULL_TEST, reason=SKIP_REASON)(__impl__) + + setattr(W2VTest, fn_name, fn) + + +for use_cuda in (False, True): + for is_sparse in (False, True): + for parallel in (False, True): + inject_test_method(use_cuda, is_sparse, parallel) + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_dropout_op.py b/python/paddle/v2/fluid/tests/test_dropout_op.py index 107b9567dc4a8539532c2fff40df437cc72cc163..b0c55df9f58834688846c5362113464996eb286a 100644 --- a/python/paddle/v2/fluid/tests/test_dropout_op.py +++ b/python/paddle/v2/fluid/tests/test_dropout_op.py @@ -21,7 +21,7 @@ class TestDropoutOp(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_test': False} + self.attrs = {'dropout_prob': 0.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64)).astype('float32') @@ -38,7 +38,7 @@ class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 1.0, 'is_test': False} + self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': np.zeros((32, 64)).astype('float32'), 'Mask': np.zeros((32, 64)).astype('float32') @@ -49,7 +49,7 @@ class TestDropoutOp3(TestDropoutOp): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")} - self.attrs = {'dropout_prob': 0.0, 'is_test': False} + self.attrs = {'dropout_prob': 0.0, 'fix_seed': True, 'is_test': False} self.outputs = { 'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2)).astype('float32') @@ -60,7 +60,7 @@ class TestDropoutOp4(OpTest): def setUp(self): self.op_type = "dropout" self.inputs = {'X': np.random.random((32, 64)).astype("float32")} - self.attrs = {'dropout_prob': 0.35, 'is_test': True} + self.attrs = {'dropout_prob': 0.35, 'fix_seed': True, 'is_test': True} self.outputs = { 'Out': self.inputs['X'] * (1.0 - self.attrs['dropout_prob']) } diff --git a/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py b/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py new file mode 100644 index 0000000000000000000000000000000000000000..e31749df9baf10215fcd0cca3c1097f00c163ec7 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_elementwise_pow_op.py @@ -0,0 +1,43 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import numpy as np +from op_test import OpTest + + +class TestElementwisePowOp(OpTest): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), + 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + def test_check_output(self): + self.check_output() + + +class TestElementwisePowOp_scalar(TestElementwisePowOp): + def setUp(self): + self.op_type = "elementwise_pow" + self.inputs = { + 'X': np.random.rand(2, 3, 4).astype('float32'), + 'Y': np.random.rand(1).astype('float32') + } + self.outputs = {'Out': np.power(self.inputs['X'], self.inputs['Y'])} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_label_smooth_op.py b/python/paddle/v2/fluid/tests/test_label_smooth_op.py new file mode 100644 index 0000000000000000000000000000000000000000..19a4df57446c0c83b415909df3e0246bf2716881 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_label_smooth_op.py @@ -0,0 +1,55 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +class TestLabelSmoothOp(OpTest): + def config(self): + self.op_type = "label_smooth" + self.epsilon = 0.1 + batch_size, self.label_dim = 5, 10 + self.label = np.zeros((batch_size, self.label_dim)).astype("float64") + nonzero_index = np.random.randint(self.label_dim, size=(batch_size)) + self.label[np.arange(batch_size), nonzero_index] = 1 + + def setUp(self): + self.config() + smoothed_label = (1 - self.epsilon + ) * self.label + self.epsilon / self.label_dim + self.inputs = {'X': self.label} + self.attrs = {'epsilon': self.epsilon} + self.outputs = {'Out': smoothed_label} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestLabelSmoothOpWithPriorDist(TestLabelSmoothOp): + def setUp(self): + self.config() + dist = np.random.random((1, self.label_dim)) + smoothed_label = (1 - self.epsilon) * self.label + self.epsilon * dist + self.inputs = {'X': self.label, 'PriorDist': dist} + self.attrs = {'epsilon': self.epsilon} + self.outputs = {'Out': smoothed_label} + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_layer_norm_op.py b/python/paddle/v2/fluid/tests/test_layer_norm_op.py new file mode 100644 index 0000000000000000000000000000000000000000..68cf8673cd46677065588f652482cd0df08b3450 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_layer_norm_op.py @@ -0,0 +1,252 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import numpy as np + +from operator import mul +from op_test import OpTest +import paddle.v2.fluid.core as core +from paddle.v2.fluid.op import Operator +from paddle.v2.fluid.framework import grad_var_name + + +def _reference_layer_norm_naive(x, scale, beta, epsilon, begin_norm_axis=1): + x_shape = x.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + x.shape = [N, D] + + mean = np.mean(x, axis=1) + var = np.var(x, axis=1) + epsilon + output = scale.reshape([1, D]) * np.divide( + (x - mean.reshape([N, 1])), + (np.sqrt(var)).reshape([N, 1])) + beta.reshape([1, D]) + + x.shape, output.shape = x_shape, x_shape + return output, mean, var + + +def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1): + x_shape = x.shape + scale_shape = scale.shape + N = reduce(mul, x_shape[0:begin_norm_axis], 1) + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + x.shape, grad_y.shape = [N, D], [N, D] + var.shape, mean.shape = [N, 1], [N, 1] + scale.shape = [1, D] + + # d_bias + d_bias = np.sum(grad_y, axis=0).reshape([1, D]) + # d_scale + d_scale = np.sum(((x - mean) * np.sqrt(1 / var)) * grad_y, + axis=0).reshape([1, D]) + # dx + dx_end = scale * np.sqrt(1.0 / var) * grad_y + d_mean_0 = np.sum(-np.sqrt(1.0 / var) * grad_y * scale, axis=1).reshape( + [N, 1]) # the second part equals to zero. + d_mean = 1.0 / D * d_mean_0 + d_std = np.sum( + -(1.0 / var) * (x - mean) * grad_y * scale, axis=1).reshape([N, 1]) * ( + 1.0 / D * np.sqrt(1.0 / var).reshape([N, 1]) * (x - mean)) + + grad_x = dx_end + d_mean + d_std + + grad_y.shape = x_shape + x.shape = x_shape + scale.shape = scale_shape + return grad_x, d_scale, d_bias + + +def get_backward_op(scope, op, no_grad_set): + backward_op = core.Operator.backward(op, no_grad_set) + for input in backward_op.input_vars(): + var = scope.var(input) + var.get_tensor() + for output in backward_op.output_vars(): + var = scope.var(output) + var.get_tensor() + return backward_op + + +def create_or_get_tensor(scope, var_name, var, place): + tensor = scope.var(var_name).get_tensor() + if var is not None: + assert isinstance(var, np.ndarray) + tensor.set_lod([[]]) + tensor.set_dims(var.shape) + tensor.set(var, place) + return tensor + + +def set_output_grad(scope, outputs, place, feed_dict=None): + def __set_tensor__(name, data=None): + out_tensor = scope.find_var(name).get_tensor() + grad_tensor = scope.var(grad_var_name(name)).get_tensor() + out_dtype = out_tensor.dtype() + if data is None: + if out_dtype == core.DataType.FP64: + data = np.ones(out_tensor.shape(), dtype=np.float64) + elif out_dtype == core.DataType.FP32: + data = np.ones(out_tensor.shape(), dtype=np.float32) + else: + raise ValueError("Not supported data type " + str(out_dtype)) + grad_tensor.set(data, place) + + for output in outputs: + data = None + if output in feed_dict: + data = feed_dict[output] + __set_tensor__(output, data) + + +class TestLayerNormdOp(OpTest): + def __assert_close(self, tensor, np_array, msg, atol=1e-4): + self.assertTrue( + np.allclose( + np.array(tensor).reshape(np_array.shape), np_array, atol=atol), + msg) + + def __assert_grad_close(self, + tensor, + np_array, + name, + place, + max_relative_error=0.02): + a = np.array(tensor).reshape(np_array.shape) + b = np_array + abs_a = np.abs(a) + abs_a[abs_a < 1e-5] = 1 + + diff_mat = np.abs(a - b) / abs_a + max_diff = np.max(diff_mat) + + def err_msg(): + offset = np.argmax(diff_mat > max_relative_error) + return ("%s Variable %s max gradient diff %f over limit %f, " + "the first error element is %d, %f, %f") % ( + "Gradient Check On %s" % str(place), name, max_diff, + max_relative_error, offset, a.flatten()[offset], + b.flatten()[offset]) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + def check_forward_backward(self, shape, begin_norm_axis): + def test_with_place(place, shape, begin_norm_axis=1): + # setUp + assert begin_norm_axis > 0 and begin_norm_axis < len( + shape), 'begin_norm_axis must be between 0 and len(shape)-1.' + # attr + epsilon = 0.00001 + x_shape = shape + D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) + scale_shape = [D] + np.random.random(123) + x_val = np.random.random_sample(x_shape).astype(np.float32) + scale_val = np.random.random_sample(scale_shape).astype(np.float32) + bias_val = np.random.random_sample(scale_shape).astype(np.float32) + y_grad = np.random.random_sample(x_shape).astype(np.float32) + + # run forward + y_out, saved_mean, var_ref = _reference_layer_norm_naive( + x_val, scale_val, bias_val, epsilon, begin_norm_axis) + naive_fw = {"Y": y_out, "Mean": saved_mean, "Variance": var_ref} + + # get gradient + x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_layer_norm_grad( + x_val, y_grad, scale_val, saved_mean, var_ref, begin_norm_axis) + naive_grad = { + "X": x_grad_ref, + "Scale": scale_grad_ref, + "Bias": bias_grad_ref + } + + scope = core.Scope() + + # create input + input_map = {"X": x_val, "Scale": scale_val, "Bias": bias_val} + for i_name in input_map: + create_or_get_tensor(scope, i_name, input_map[i_name], place) + + # create output + output_map = {"Y": None, "Mean": None, "Variance": None} + output_tensor = {} + for o_name in output_map: + output_tensor[o_name] = create_or_get_tensor( + scope, o_name, output_map[o_name], place) + + layer_norm_op = Operator( + "layer_norm", + # inputs + X="X", + Scale="Scale", + Bias="Bias", + # outputs + Y="Y", + Mean="Mean", + Variance="Variance", + # attrs + epsilon=epsilon, + begin_norm_axis=begin_norm_axis) + + layer_norm_op.run(scope, place) + + # check forward result + atol = 5e-2 if isinstance(place, core.CUDAPlace) else 1e-4 + for o_tensor in output_tensor: + self.__assert_close(output_tensor[o_tensor], naive_fw[o_tensor], + o_tensor, atol) + + # run backward + layer_norm_op_grad = get_backward_op(scope, layer_norm_op, set()) + set_output_grad( + scope, ["Y", "Mean", "Variance"], + place, + feed_dict={"Y": y_grad}) + layer_norm_op_grad.run(scope, place) + + # get output + grad_tensor = {} + for o_name in naive_grad: + grad_tensor[o_name] = x_ = create_or_get_tensor( + scope, grad_var_name(o_name), None, place) + + # check gradient output + for o_grad in naive_grad: + self.__assert_grad_close(grad_tensor[o_grad], + naive_grad[o_grad], o_grad + "@GRAD", + place) + + places = [core.CPUPlace()] + if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): + places.append(core.CUDAPlace(0)) + + for place in places: + test_with_place(place, shape, begin_norm_axis) + + def test_check_forward_backward_with_scale_and_bias(self): + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=1) + self.check_forward_backward(shape=[2, 3, 4, 5], begin_norm_axis=3) + + def test_check_forward_backward_with_scale(self): + pass # TODO(zcd) + + def test_check_forward_backward_with_bias(self): + pass # TODO(zcd) + + def test_check_forward_backward(self): + pass # TODO(zcd) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_learning_rate_decay.py b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py new file mode 100644 index 0000000000000000000000000000000000000000..dc348cf2d21693290095900f8ab63c29923b4673 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_learning_rate_decay.py @@ -0,0 +1,110 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import math +import paddle.v2.fluid.framework as framework +import paddle.v2.fluid as fluid +import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.learning_rate_decay as lr_decay + + +def exponential_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + exponent = float(global_step) / float(decay_steps) + if staircase: + exponent = math.floor(exponent) + return learning_rate * decay_rate**exponent + + +def natural_exp_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + exponent = float(global_step) / float(decay_steps) + if staircase: + exponent = math.floor(exponent) + return learning_rate * math.exp(-1 * decay_rate * exponent) + + +def inverse_time_decay(learning_rate, + global_step, + decay_steps, + decay_rate, + staircase=False): + temp = float(global_step) / float(decay_steps) + if staircase: + temp = math.floor(temp) + return learning_rate / (1 + decay_rate * temp) + + +class TestLearningRateDecay(unittest.TestCase): + def check_decay(self, python_decay_fn, fluid_decay_fn, staircase): + init_lr = 1.0 + decay_steps = 5 + decay_rate = 0.5 + + global_step = layers.create_global_var( + shape=[1], value=0.0, dtype='float32', persistable=True) + + decayed_lr = fluid_decay_fn( + learning_rate=init_lr, + global_step=global_step, + decay_steps=decay_steps, + decay_rate=decay_rate, + staircase=staircase) + layers.increment(global_step, 1.0) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + + exe.run(fluid.default_startup_program()) + for step in range(10): + step_val, lr_val = exe.run(fluid.default_main_program(), + feed=[], + fetch_list=[global_step, decayed_lr]) + python_decayed_lr = python_decay_fn( + learning_rate=init_lr, + global_step=step, + decay_steps=decay_steps, + decay_rate=decay_rate, + staircase=staircase) + self.assertAlmostEqual(python_decayed_lr, lr_val[0]) + + def test_decay(self): + decay_fns = [ + (exponential_decay, lr_decay.exponential_decay, True), + (exponential_decay, lr_decay.exponential_decay, False), + (natural_exp_decay, lr_decay.natural_exp_decay, True), + (natural_exp_decay, lr_decay.natural_exp_decay, False), + (inverse_time_decay, lr_decay.inverse_time_decay, True), + (inverse_time_decay, lr_decay.inverse_time_decay, False), + ] + + for py_decay_fn, fluid_decay_fn, staircase in decay_fns: + print("decay_fn=" + str(py_decay_fn) + " staircase=" + str( + staircase)) + main_program = framework.Program() + startup_program = framework.Program() + with framework.program_guard(main_program, startup_program): + self.check_decay(py_decay_fn, fluid_decay_fn, staircase) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_tensor.py b/python/paddle/v2/fluid/tests/test_tensor.py index d5cc235f588ad37b0d1293dc9894952c97411757..0219bef42b3ba133dda7412c1036cf989a170a36 100644 --- a/python/paddle/v2/fluid/tests/test_tensor.py +++ b/python/paddle/v2/fluid/tests/test_tensor.py @@ -108,9 +108,31 @@ class TestTensor(unittest.TestCase): scope = core.Scope() place = core.CPUPlace() lod_py = [[0, 2, 5], [0, 2, 4, 5]] - lod_tensor = core.LoDTensor(lod_py) + lod_tensor = core.LoDTensor() lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.set_lod(lod_py) + lod_tensor.alloc_float(place) + tensor_array = numpy.array(lod_tensor) + tensor_array[0, 0, 0, 0] = 1.0 + tensor_array[0, 0, 0, 1] = 2.0 + lod_tensor.set(tensor_array, place) + + lod_v = numpy.array(lod_tensor) + self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) + self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) + self.assertListEqual(lod_py, lod_tensor.lod()) + + def test_lod_tensor_gpu_init(self): + if not core.is_compiled_with_cuda(): + return + scope = core.Scope() + place = core.CUDAPlace(0) + lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_tensor = core.LoDTensor() + + lod_tensor.set_dims([5, 2, 3, 4]) + lod_tensor.set_lod(lod_py) lod_tensor.alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 diff --git a/python/paddle/v2/fluid/tests/test_weight_normalization.py b/python/paddle/v2/fluid/tests/test_weight_normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..80ad8285d8a3c2ced814cc3588a814c14ec60855 --- /dev/null +++ b/python/paddle/v2/fluid/tests/test_weight_normalization.py @@ -0,0 +1,121 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy +import collections +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core +from paddle.v2.fluid.initializer import ConstantInitializer +from paddle.v2.fluid.param_attr import WeightNormParamAttr + + +class TestWeightNormalization(unittest.TestCase): + batch_size = 3 + hidden_size = 5 + data_desc = (['x', [10], 0], ) + + @classmethod + def setUpClass(cls): + cls.set_program() + + @classmethod + def set_program(cls): + data = fluid.layers.data( + name=cls.data_desc[0][0], shape=cls.data_desc[0][1]) + out = fluid.layers.fc(input=data, + size=cls.hidden_size, + param_attr=WeightNormParamAttr( + dim=None, + name='weight_norm_param', + initializer=ConstantInitializer(1.0)), + bias_attr=False, + act=None) + loss = fluid.layers.reduce_sum(out) + fluid.backward.append_backward(loss=loss) + cls.fetch_list = [ + 'weight_norm_param_g', 'weight_norm_param_v', + 'weight_norm_param_g@GRAD' + ] + + def run_program(self): + outputs = [] + places = [core.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(core.CUDAPlace(0)) + for place in places: + self.set_inputs(place) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + output = exe.run(fluid.default_main_program(), + feed=self.inputs, + fetch_list=self.fetch_list, + return_numpy=False) + outputs.append(output) + self.actual_outputs = outputs + + def set_data(self): + self.data = collections.OrderedDict() + for desc in self.data_desc: + data_name = desc[0] + data_shape = desc[1] + data_lod_level = desc[2] + data_lod = [] + for i in range(data_lod_level): + lod_level_i = numpy.random.randint( + low=1, + high=5, + size=self.batch_size if i == 0 else lod_level_i[-1]) + lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist() + data_lod.append(lod_level_i) + data_value = numpy.random.random( + size=[data_lod[-1][-1] if data_lod else self.batch_size + ] + data_shape).astype('float32') + self.data[data_name] = (data_value, data_lod) + + def set_inputs(self, place): + self.inputs = {} + for desc in self.data_desc: + tensor = fluid.Tensor() + tensor.set(self.data[desc[0]][0], place) + if self.data[desc[0]][1]: + tensor.set_lod(self.data[desc[0]][1]) + self.inputs[desc[0]] = tensor + + def weight_normalize(self): + v = numpy.ones((self.data[self.data_desc[0][0]][0].shape[-1], + self.hidden_size)) + g = numpy.linalg.norm(v, axis=None, keepdims=True) + w = g * v / numpy.linalg.norm(v, axis=None, keepdims=True) + x = self.data[self.data_desc[0][0]][0] + out = numpy.dot(x, w) + g_grad = (numpy.dot(x.T, numpy.ones_like(out)) * (v / numpy.linalg.norm( + v, axis=None, keepdims=True))).sum(axis=None, keepdims=True) + return g, v, g_grad + + def test_weight_normalization(self): + self.set_data() + self.run_program() + expect_output = self.weight_normalize() + for actual_output in self.actual_outputs: + [ + self.assertTrue( + numpy.allclose( + numpy.array(actual), expect, atol=0.001)) + for expect, actual in zip(expect_output, actual_output) + ] + + +if __name__ == '__main__': + unittest.main()