提交 732eebb2 编写于 作者: Z zhouhanqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into Paddle-ReduceProd

...@@ -190,18 +190,18 @@ op_library(cos_sim_op DEPS cos_sim_functor) ...@@ -190,18 +190,18 @@ op_library(cos_sim_op DEPS cos_sim_functor)
op_library(parallel_do_op DEPS executor) op_library(parallel_do_op DEPS executor)
if (WITH_GPU) if (WITH_GPU)
op_library(conv_op DEPS vol2col depthwise_conv) op_library(conv_op DEPS vol2col depthwise_conv im2col)
else() else()
op_library(conv_op DEPS vol2col) op_library(conv_op DEPS vol2col im2col)
endif() endif()
op_library(conv_transpose_op DEPS vol2col) op_library(conv_transpose_op DEPS vol2col im2col)
# FIXME(typhoonzero): save/load depends lodtensor serialization functions # FIXME(typhoonzero): save/load depends lodtensor serialization functions
op_library(save_op DEPS lod_tensor) op_library(save_op DEPS lod_tensor)
op_library(load_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor)
op_library(save_combine_op DEPS lod_tensor) op_library(save_combine_op DEPS lod_tensor)
op_library(load_combine_op DEPS lod_tensor) op_library(load_combine_op DEPS lod_tensor)
op_library(concat_op DEPS concat_functor) op_library(concat_op DEPS concat)
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS}) foreach(src ${GENERAL_OPS})
......
add_subdirectory(detail) add_subdirectory(detail)
if(WITH_GPU) function(math_library TARGET)
nv_library(math_function SRCS math_function.cc math_function.cu im2col.cc im2col.cu DEPS cblas device_context framework_proto) # math_library is a function to create math library.
nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function tensor) # The interface is the same as cc_library.
nv_library(selected_rows_functor SRCS selected_rows_functor.cc selected_rows_functor.cu DEPS selected_rows math_function) # But it handle split GPU/CPU code and link some common library.
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) set(cc_srcs)
nv_library(softmax SRCS softmax.cc softmax.cu DEPS device_context) set(cu_srcs)
nv_library(cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context) set(math_common_deps device_context framework_proto)
nv_library(pooling SRCS pooling.cc pooling.cu DEPS device_context) set(multiValueArgs DEPS)
nv_library(depthwise_conv SRCS depthwise_conv.cu DEPS device_context) cmake_parse_arguments(math_library "${options}" "${oneValueArgs}"
nv_library(sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function) "${multiValueArgs}" ${ARGN})
nv_library(vol2col SRCS vol2col.cc vol2col.cu DEPS device_context tensor)
nv_library(context_project SRCS context_project.cc context_project.cu DEPS device_context math_function) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
nv_library(sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context tensor math_function) list(APPEND cc_srcs ${TARGET}.cc)
nv_library(sequence_padding SRCS sequence_padding.cc sequence_padding.cu DEPS lod_tensor device_context) endif()
nv_library(sequence_scale SRCS sequence_scale.cc sequence_scale.cu DEPS lod_tensor device_context) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
nv_library(lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions) list(APPEND cu_srcs ${TARGET}.cu)
nv_library(maxouting SRCS maxouting.cc maxouting.cu DEPS device_context) endif()
nv_library(unpooling SRCS unpooling.cc unpooling.cu DEPS device_context)
nv_library(gru_compute SRCS gru_compute.cc gru_compute.cu DEPS device_context activation_functions math_function) list(LENGTH cc_srcs cc_srcs_len)
nv_library(cos_sim_functor SRCS cos_sim_functor.cc cos_sim_functor.cu DEPS device_context) if (WITH_GPU)
nv_library(concat_functor SRCS concat.cc concat.cu DEPS device_context tensor) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
else() elseif(${cc_srcs_len} GREATER 0)
cc_library(math_function SRCS math_function.cc im2col.cc DEPS cblas device_context framework_proto) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
cc_library(selected_rows_functor SRCS selected_rows_functor.cc DEPS selected_rows math_function) endif()
cc_library(softmax SRCS softmax.cc DEPS device_context) endfunction()
cc_library(cross_entropy SRCS cross_entropy.cc DEPS device_context)
cc_library(pooling SRCS pooling.cc DEPS device_context)
cc_library(sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function)
cc_library(vol2col SRCS vol2col.cc DEPS device_context tensor)
cc_library(context_project SRCS context_project.cc DEPS device_context math_function)
cc_library(sequence2batch SRCS sequence2batch.cc DEPS device_context tensor math_function)
cc_library(sequence_padding SRCS sequence_padding.cc DEPS lod_tensor device_context)
cc_library(sequence_scale SRCS sequence_scale.cc DEPS lod_tensor device_context)
cc_library(lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions)
cc_library(maxouting SRCS maxouting.cc DEPS device_context)
cc_library(unpooling SRCS unpooling.cc DEPS device_context)
cc_library(gru_compute SRCS gru_compute.cc DEPS device_context activation_functions math_function)
cc_library(cos_sim_functor SRCS cos_sim_functor.cc DEPS device_context)
cc_library(concat_functor SRCS concat.cc DEPS device_context tensor)
endif()
cc_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor) # please add new math_library in alphabetical order
math_library(concat)
math_library(context_project DEPS im2col math_function)
math_library(cross_entropy)
math_library(cos_sim_functor)
math_library(depthwise_conv)
math_library(gru_compute DEPS activation_functions math_function)
math_library(im2col)
math_library(lstm_compute DEPS activation_functions)
math_library(math_function DEPS cblas)
math_library(maxouting)
math_library(pooling)
math_library(selected_rows_functor DEPS selected_rows)
math_library(sequence2batch)
math_library(sequence_padding)
math_library(sequence_pooling DEPS math_function)
math_library(sequence_scale)
math_library(softmax)
math_library(unpooling)
math_library(vol2col)
cc_test(math_function_test SRCS math_function_test.cc)
cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor)
cc_test(im2col_test SRCS im2col_test.cc DEPS math_function tensor) cc_test(im2col_test SRCS im2col_test.cc DEPS im2col)
cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col tensor) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col)
cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding)
cc_test(concat_test SRCS concat_test.cc DEPS concat_functor tensor) if(WITH_GPU)
nv_test(math_function_gpu_test SRCS math_function_test.cu)
nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor)
endif()
cc_test(concat_test SRCS concat_test.cc DEPS concat)
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/math/sequence2batch.h" #include "paddle/fluid/operators/math/sequence2batch.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -31,6 +31,8 @@ dtype_to_size = { ...@@ -31,6 +31,8 @@ dtype_to_size = {
sub_block_ops = ["while", "while_grad", "parallel_do", "parallel_do_grad"] sub_block_ops = ["while", "while_grad", "parallel_do", "parallel_do_grad"]
PRINT_LOG = False
class ControlFlowGraph(object): class ControlFlowGraph(object):
def __init__(self, Program, ops, forward_num, skip_opt): def __init__(self, Program, ops, forward_num, skip_opt):
...@@ -171,12 +173,14 @@ class ControlFlowGraph(object): ...@@ -171,12 +173,14 @@ class ControlFlowGraph(object):
# TODO(qijun): actually, we should compare dtype_to_size[x_dtype] # TODO(qijun): actually, we should compare dtype_to_size[x_dtype]
# and dtype_to_size[cache_dtype] # and dtype_to_size[cache_dtype]
if x_dtype == cache_dtype: if x_dtype == cache_dtype:
print(("Hit Cache !!!! cache pool index " if PRINT_LOG:
"is %d, var name is %s, " print(
"cached var name is %s, " ("Hit Cache !!!! cache pool index "
"var shape is %s ") % "is %d, var name is %s, "
(index, x, cache_var, "cached var name is %s, "
str(cache_shape))) "var shape is %s ") %
(index, x, cache_var,
str(cache_shape)))
self.pool.pop(index) self.pool.pop(index)
if x == cache_var: if x == cache_var:
break break
...@@ -277,7 +281,9 @@ def _get_cfgs(input_program): ...@@ -277,7 +281,9 @@ def _get_cfgs(input_program):
return cfgs return cfgs
def memory_optimize(input_program): def memory_optimize(input_program, print_log=False):
global PRINT_LOG
PRINT_LOG = print_log
cfgs = _get_cfgs(input_program) cfgs = _get_cfgs(input_program)
for cfg in cfgs: for cfg in cfgs:
cfg.memory_optimize() cfg.memory_optimize()
...@@ -49,7 +49,7 @@ avg_cost = fluid.layers.mean(x=cost) ...@@ -49,7 +49,7 @@ avg_cost = fluid.layers.mean(x=cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
fluid.memory_optimize(fluid.default_main_program()) fluid.memory_optimize(fluid.default_main_program(), print_log=True)
BATCH_SIZE = 200 BATCH_SIZE = 200
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册