提交 1f12ba61 编写于 作者: P peizhilin

gpu support, fix build issue:

1. Non utf-8 characters within comments of OPs may lead to protobuf fail to parse_from_string
2. comment out some ops which not supported on windows
3. cuda libs may not be correctly linked to target on windows
上级 71d7980f
...@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) ...@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
# TODO(panyx0718): CUPTI only allows DSO? # TODO(panyx0718): CUPTI only allows DSO?
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
if(WIN32)
set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(WIN32)
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
# setting nvcc arch flags # setting nvcc arch flags
......
...@@ -13,10 +13,14 @@ cc_library(paddle_fluid_api ...@@ -13,10 +13,14 @@ cc_library(paddle_fluid_api
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
# paddle_fluid_origin exclude inference api interface # paddle_fluid_origin exclude inference api interface
if(WIN32) if(WIN32)
sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api) sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid_origin ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api) cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
endif(WIN32) endif(WIN32)
...@@ -36,6 +40,9 @@ endif() ...@@ -36,6 +40,9 @@ endif()
# Create static library # Create static library
if(WIN32) if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array) sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array) cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
endif(WIN32) endif(WIN32)
...@@ -50,6 +57,9 @@ endif() ...@@ -50,6 +57,9 @@ endif()
if(WIN32) if(WIN32)
sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array) DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid_origin ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array) DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
......
...@@ -74,7 +74,7 @@ PadConstantLikeOp Operator. ...@@ -74,7 +74,7 @@ PadConstantLikeOp Operator.
Pad input(Y) with a pad_value, the number of values padded to the edges of each Pad input(Y) with a pad_value, the number of values padded to the edges of each
axis is specified by the difference of the shape of X and Y. axis is specified by the difference of the shape of X and Y.
((0, shape_x_0 - shape_y_0), (0, shape_x_n - shape_y_n)) unique pad widths for ((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n)) unique pad widths for
each axis. each axis.
The input should be a k-D tensor(k > 0 and k < 7). As an example: The input should be a k-D tensor(k > 0 and k < 7). As an example:
......
...@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor), " "(Tensor), "
"Argmaxes corresponding to indices in X used " "Argmaxes corresponding to indices in X used "
"for gradient computation. Only output " "for gradient computation. Only output "
"if arg “is_test” is false.") "if arg \"is_test\" is false.")
.AsIntermediate(); .AsIntermediate();
AddAttr<float>("spatial_scale", AddAttr<float>("spatial_scale",
"(float, default 1.0), " "(float, default 1.0), "
......
...@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is: Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is:
$(N, C_{out}, H_{out}, W_{out})$, where $(N, C_{out}, H_{out}, W_{out})$, where
$$ $$
H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\ H_{out} = (H_{in}-1) * strides[0] - 2 * paddings[0] + ksize[0] \\
W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1] W_{out} = (W_{in}-1) * strides[1] - 2 * paddings[1] + ksize[1]
$$ $$
Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf
)DOC"); )DOC");
......
...@@ -22,6 +22,10 @@ if(WITH_PYTHON) ...@@ -22,6 +22,10 @@ if(WITH_PYTHON)
endif(WITH_AMD_GPU) endif(WITH_AMD_GPU)
if(WIN32) if(WIN32)
if(WITH_GPU AND NOT WITH_DSO)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
target_link_libraries(paddle_pybind ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_pybind shlwapi) target_link_libraries(paddle_pybind shlwapi)
endif(WIN32) endif(WIN32)
......
...@@ -61,12 +61,13 @@ IF(WIN32) ...@@ -61,12 +61,13 @@ IF(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
# COMMAND ${CMAKE_COMMAND} -E touch stub.cc # COMMAND ${CMAKE_COMMAND} -E touch stub.cc
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/ COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python # COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/lib-python # COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/libs
DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
ELSE(WIN32) ELSE(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os
# import all class inside framework into fluid module # import all class inside framework into fluid module
from . import framework from . import framework
from .framework import * from .framework import *
...@@ -43,16 +44,17 @@ from .lod_tensor import create_lod_tensor, create_random_int_lodtensor ...@@ -43,16 +44,17 @@ from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
from . import clip from . import clip
from . import profiler from . import profiler
from . import unique_name from . import unique_name
from . import recordio_writer if os.name != 'nt':
from . import parallel_executor from . import recordio_writer
from .parallel_executor import * from . import parallel_executor
from .parallel_executor import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable from paddle.fluid.layers.math_op_patch import monkey_patch_variable
Tensor = LoDTensor Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + \ __all__ = framework.__all__ + executor.__all__ + \
trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \ trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \
parallel_executor.__all__ + lod_tensor.__all__ + [ lod_tensor.__all__ + [
'io', 'io',
'initializer', 'initializer',
'layers', 'layers',
...@@ -78,7 +80,8 @@ __all__ = framework.__all__ + executor.__all__ + \ ...@@ -78,7 +80,8 @@ __all__ = framework.__all__ + executor.__all__ + \
'recordio_writer', 'recordio_writer',
'Scope', 'Scope',
] ]
if os.name != 'nt':
__all__ += parallel_executor.__all__
def __bootstrap__(): def __bootstrap__():
""" """
...@@ -110,12 +113,16 @@ def __bootstrap__(): ...@@ -110,12 +113,16 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
'dist_threadpool_size', 'cpu_deterministic', 'eager_delete_tensor_gb', 'dist_threadpool_size', 'eager_delete_tensor_gb',
'reader_queue_speed_test_mode' 'reader_queue_speed_test_mode'
] ]
if os.name != 'nt':
read_env_flags.append('warpctc_dir')
read_env_flags.append('cpu_deterministic')
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
read_env_flags.append('rpc_server_profile_period') read_env_flags.append('rpc_server_profile_period')
......
...@@ -15,13 +15,15 @@ ...@@ -15,13 +15,15 @@
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import os
from .. import core from .. import core
from .. import executor from .. import executor
from .. import framework from .. import framework
from .. import io from .. import io
from .. import parallel_executor if os.name != 'nt':
from .. import parallel_executor
from .. import unique_name from .. import unique_name
from .trainer import check_and_get_place from .trainer import check_and_get_place
......
...@@ -28,7 +28,8 @@ from .. import framework ...@@ -28,7 +28,8 @@ from .. import framework
from .. import io from .. import io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from .. import optimizer as opt_module from .. import optimizer as opt_module
from .. import parallel_executor if os.name != 'nt':
from .. import parallel_executor
from ..transpiler import distribute_transpiler from ..transpiler import distribute_transpiler
__all__ = [ __all__ = [
......
...@@ -536,7 +536,7 @@ class Operator(object): ...@@ -536,7 +536,7 @@ class Operator(object):
OP_WITHOUT_KERNEL_SET = { OP_WITHOUT_KERNEL_SET = {
'feed', 'fetch', 'save', 'load', 'recurrent', 'go', 'feed', 'fetch', 'save', 'load', 'recurrent', 'go',
'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv', 'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv',
'listen_and_serv', 'parallel_do', 'save_combine', 'load_combine', 'listen_and_serv', 'parallel_do', 'save_combine', 'loadload_combine',
'ncclInit', 'select', 'checkpoint_notify', 'gen_nccl_id' 'ncclInit', 'select', 'checkpoint_notify', 'gen_nccl_id'
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import multiprocessing import multiprocessing
import os
import six import six
import threading import threading
...@@ -344,8 +345,9 @@ def _copy_reader_create_op_(block, op): ...@@ -344,8 +345,9 @@ def _copy_reader_create_op_(block, op):
return new_op return new_op
@templatedoc(op_type='create_recordio_file_reader') if os.name != 'nt':
def open_recordio_file(filename, @templatedoc(op_type='create_recordio_file_reader')
def open_recordio_file(filename,
shapes, shapes,
lod_levels, lod_levels,
dtypes, dtypes,
......
...@@ -18,6 +18,7 @@ All layers just related to the neural network. ...@@ -18,6 +18,7 @@ All layers just related to the neural network.
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import os
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable, OpProtoHolder from ..framework import Variable, OpProtoHolder
...@@ -31,12 +32,10 @@ from functools import reduce ...@@ -31,12 +32,10 @@ from functools import reduce
__all__ = [ __all__ = [
'fc', 'fc',
'embedding', 'embedding',
'dynamic_lstm',
'dynamic_lstmp', 'dynamic_lstmp',
'dynamic_gru', 'dynamic_gru',
'gru_unit', 'gru_unit',
'linear_chain_crf', 'linear_chain_crf',
'crf_decoding',
'cos_sim', 'cos_sim',
'cross_entropy', 'cross_entropy',
'square_error_cost', 'square_error_cost',
...@@ -95,7 +94,6 @@ __all__ = [ ...@@ -95,7 +94,6 @@ __all__ = [
'pad', 'pad',
'pad_constant_like', 'pad_constant_like',
'label_smooth', 'label_smooth',
'roi_pool',
'roi_align', 'roi_align',
'dice_loss', 'dice_loss',
'image_resize', 'image_resize',
...@@ -160,6 +158,10 @@ __all__ = [ ...@@ -160,6 +158,10 @@ __all__ = [
'log_loss', 'log_loss',
'add_position_encoding', 'add_position_encoding',
] ]
if os.name != 'nt':
__all__.append('dynamic_lstm')
__all__.append('crf_decoding')
__all__.append('roi_pool')
def fc(input, def fc(input,
...@@ -334,8 +336,9 @@ def embedding(input, ...@@ -334,8 +336,9 @@ def embedding(input,
return tmp return tmp
@templatedoc(op_type="lstm") if os.name != 'nt':
def dynamic_lstm(input, @templatedoc(op_type="lstm")
def dynamic_lstm(input,
size, size,
h_0=None, h_0=None,
c_0=None, c_0=None,
...@@ -923,8 +926,9 @@ def linear_chain_crf(input, label, param_attr=None): ...@@ -923,8 +926,9 @@ def linear_chain_crf(input, label, param_attr=None):
return log_likelihood return log_likelihood
@templatedoc() if os.name != 'nt':
def crf_decoding(input, param_attr, label=None): @templatedoc()
def crf_decoding(input, param_attr, label=None):
""" """
${comment} ${comment}
...@@ -5443,8 +5447,9 @@ def label_smooth(label, ...@@ -5443,8 +5447,9 @@ def label_smooth(label,
return smooth_label return smooth_label
@templatedoc() if os.name != 'nt':
def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): @templatedoc()
def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
""" """
${comment} ${comment}
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os
from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr
from .. import core from .. import core
from ..framework import convert_np_dtype_to_dtype_ from ..framework import convert_np_dtype_to_dtype_
...@@ -99,12 +100,13 @@ Examples: ...@@ -99,12 +100,13 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
""" """
__all__ += ['cumsum'] if os.name != 'nt':
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum') _cum_sum_ = generate_layer_fn('cumsum')
def cumsum(x, axis=None, exclusive=None, reverse=None): def cumsum(x, axis=None, exclusive=None, reverse=None):
locals_var = locals().keys() locals_var = locals().keys()
kwargs = dict() kwargs = dict()
for name in locals_var: for name in locals_var:
...@@ -114,12 +116,12 @@ def cumsum(x, axis=None, exclusive=None, reverse=None): ...@@ -114,12 +116,12 @@ def cumsum(x, axis=None, exclusive=None, reverse=None):
return _cum_sum_(**kwargs) return _cum_sum_(**kwargs)
cumsum.__doc__ = _cum_sum_.__doc__ + """ cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples: Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784]) >>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0) >>> result = fluid.layers.cumsum(data, axis=0)
""" """
__all__ += ['thresholded_relu'] __all__ += ['thresholded_relu']
......
...@@ -180,7 +180,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': ...@@ -180,7 +180,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
package_data['paddle.libs']+=['libmkldnn.so.0'] package_data['paddle.libs']+=['libmkldnn.so.0']
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
# remove unused paddle/libs/__init__.py # remove unused paddle/libs/__init__.py
os.remove(libs_path+'/__init__.py') if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
package_dir['paddle.libs']=libs_path package_dir['paddle.libs']=libs_path
# change rpath of core.so, add $ORIGIN/../libs/ to it. # change rpath of core.so, add $ORIGIN/../libs/ to it.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册