diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 23d75b749998613e2e6e4acdf0e0d6438079e8e1..5d436a7e0c3752c889c19820507589f34d3bee94 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -190,18 +190,18 @@ op_library(cos_sim_op DEPS cos_sim_functor) op_library(parallel_do_op DEPS executor) if (WITH_GPU) - op_library(conv_op DEPS vol2col depthwise_conv) + op_library(conv_op DEPS vol2col depthwise_conv im2col) else() - op_library(conv_op DEPS vol2col) + op_library(conv_op DEPS vol2col im2col) endif() -op_library(conv_transpose_op DEPS vol2col) +op_library(conv_transpose_op DEPS vol2col im2col) # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) op_library(save_combine_op DEPS lod_tensor) op_library(load_combine_op DEPS lod_tensor) -op_library(concat_op DEPS concat_functor) +op_library(concat_op DEPS concat) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 751e69b1c89dd2f69b43758fe66adf39846a8ab2..a181d802262d15b188060dae4330cec0e24714ab 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -1,49 +1,59 @@ add_subdirectory(detail) -if(WITH_GPU) - nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context framework_proto) - nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) - nv_library(selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function) - nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) - nv_library(softmax SRCS softmax.cc softmax.cu DEPS device_context) - nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) - nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) - nv_library(depthwise_conv SRCS depthwise_conv.cu DEPS device_context) - nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) - nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor) - nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) - nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor math_function) - nv_library(sequence_padding SRCS sequence_padding.cc sequence_padding.cu DEPS lod_tensor device_context) - nv_library(sequence_scale SRCS sequence_scale.cc sequence_scale.cu DEPS lod_tensor device_context) - nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) - nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) - nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context) - nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) - nv_library(cos_sim_functor SRCS cos_sim_functor.cc cos_sim_functor.cu DEPS device_context) - nv_library(concat_functor SRCS concat.cc concat.cu DEPS device_context tensor) -else() - cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) - cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) - cc_library(softmax SRCS softmax.cc DEPS device_context) - cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context) - cc_library(pooling SRCS pooling.cc DEPS device_context) - cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function) - cc_library(vol2col SRCS vol2col.cc DEPS device_context tensor) - cc_library(context_project SRCS context_project.cc DEPS device_context math_function) - cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor math_function) - cc_library(sequence_padding SRCS sequence_padding.cc DEPS lod_tensor device_context) - cc_library(sequence_scale SRCS sequence_scale.cc DEPS lod_tensor device_context) - cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions) - cc_library(maxouting SRCS maxouting.cc DEPS device_context) - cc_library(unpooling SRCS unpooling.cc DEPS device_context) - cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function) - cc_library(cos_sim_functor SRCS cos_sim_functor.cc DEPS device_context) - cc_library(concat_functor SRCS concat.cc DEPS device_context tensor) -endif() +function(math_library TARGET) + # math_library is a function to create math library. + # The interface is the same as cc_library. + # But it handle split GPU/CPU code and link some common library. + set(cc_srcs) + set(cu_srcs) + set(math_common_deps device_context framework_proto) + set(multiValueArgs DEPS) + cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN}) + + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) + list(APPEND cc_srcs ${TARGET}.cc) + endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) + list(APPEND cu_srcs ${TARGET}.cu) + endif() + + list(LENGTH cc_srcs cc_srcs_len) + if (WITH_GPU) + nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif(${cc_srcs_len} GREATER 0) + cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + endif() +endfunction() -cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) +# please add new math_library in alphabetical order +math_library(concat) +math_library(context_project DEPS im2col math_function) +math_library(cross_entropy) +math_library(cos_sim_functor) +math_library(depthwise_conv) +math_library(gru_compute DEPS activation_functions math_function) +math_library(im2col) +math_library(lstm_compute DEPS activation_functions) +math_library(math_function DEPS cblas) +math_library(maxouting) +math_library(pooling) +math_library(selected_rows_functor DEPS selected_rows) +math_library(sequence2batch) +math_library(sequence_padding) +math_library(sequence_pooling DEPS math_function) +math_library(sequence_scale) +math_library(softmax) +math_library(unpooling) +math_library(vol2col) + +cc_test(math_function_test SRCS math_function_test.cc) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) -cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor) -cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col tensor) +cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) +cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) -cc_test(concat_test SRCS concat_test.cc DEPS concat_functor tensor) +if(WITH_GPU) + nv_test(math_function_gpu_test SRCS math_function_test.cu) + nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) +endif() +cc_test(concat_test SRCS concat_test.cc DEPS concat) diff --git a/paddle/fluid/operators/math/sequence2batch.cc b/paddle/fluid/operators/math/sequence2batch.cc index 72bf2ab17016157f22bcf180090660346ed4f60d..8899abff360ea867872d3433722cdb37ef358500 100644 --- a/paddle/fluid/operators/math/sequence2batch.cc +++ b/paddle/fluid/operators/math/sequence2batch.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sequence2batch.h" -#include "paddle/fluid/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/python/paddle/fluid/memory_optimization_transpiler.py b/python/paddle/fluid/memory_optimization_transpiler.py index 708ca08b17c85efbb25ecaf2580b7141421e25b9..4fa2d03ef563b98b2eec576bf87d4b2e54ca0a36 100644 --- a/python/paddle/fluid/memory_optimization_transpiler.py +++ b/python/paddle/fluid/memory_optimization_transpiler.py @@ -31,6 +31,8 @@ dtype_to_size = { sub_block_ops = ["while", "while_grad", "parallel_do", "parallel_do_grad"] +PRINT_LOG = False + class ControlFlowGraph(object): def __init__(self, Program, ops, forward_num, skip_opt): @@ -171,12 +173,14 @@ class ControlFlowGraph(object): # TODO(qijun): actually, we should compare dtype_to_size[x_dtype] # and dtype_to_size[cache_dtype] if x_dtype == cache_dtype: - print(("Hit Cache !!!! cache pool index " - "is %d, var name is %s, " - "cached var name is %s, " - "var shape is %s ") % - (index, x, cache_var, - str(cache_shape))) + if PRINT_LOG: + print( + ("Hit Cache !!!! cache pool index " + "is %d, var name is %s, " + "cached var name is %s, " + "var shape is %s ") % + (index, x, cache_var, + str(cache_shape))) self.pool.pop(index) if x == cache_var: break @@ -277,7 +281,9 @@ def _get_cfgs(input_program): return cfgs -def memory_optimize(input_program): +def memory_optimize(input_program, print_log=False): + global PRINT_LOG + PRINT_LOG = print_log cfgs = _get_cfgs(input_program) for cfg in cfgs: cfg.memory_optimize() diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py index 7648bb9fe1c8d2dae3590fa67f781743ba297c32..c9d2a5ecaab0669f308b5b9c5cf74d0212fa462a 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py @@ -49,7 +49,7 @@ avg_cost = fluid.layers.mean(x=cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) -fluid.memory_optimize(fluid.default_main_program()) +fluid.memory_optimize(fluid.default_main_program(), print_log=True) BATCH_SIZE = 200