提交 1f12ba61 编写于 作者: P peizhilin

gpu support, fix build issue:

1. Non utf-8 characters within comments of OPs may lead to protobuf fail to parse_from_string
2. comment out some ops which not supported on windows
3. cuda libs may not be correctly linked to target on windows
上级 71d7980f
...@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY}) ...@@ -157,6 +157,9 @@ list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
if(NOT WITH_DSO) if(NOT WITH_DSO)
# TODO(panyx0718): CUPTI only allows DSO? # TODO(panyx0718): CUPTI only allows DSO?
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY}) list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUPTI_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
if(WIN32)
set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
endif(WIN32)
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
# setting nvcc arch flags # setting nvcc arch flags
......
...@@ -13,10 +13,14 @@ cc_library(paddle_fluid_api ...@@ -13,10 +13,14 @@ cc_library(paddle_fluid_api
DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB})
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
# paddle_fluid_origin exclude inference api interface # paddle_fluid_origin exclude inference api interface
if(WIN32) if(WIN32)
sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api) sep_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid_origin ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api) cc_library(paddle_fluid_origin DEPS ${fluid_modules} paddle_fluid_api)
endif(WIN32) endif(WIN32)
...@@ -36,6 +40,9 @@ endif() ...@@ -36,6 +40,9 @@ endif()
# Create static library # Create static library
if(WIN32) if(WIN32)
sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array) sep_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array) cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array)
endif(WIN32) endif(WIN32)
...@@ -50,6 +57,9 @@ endif() ...@@ -50,6 +57,9 @@ endif()
if(WIN32) if(WIN32)
sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} sep_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array) DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
if(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_fluid_origin ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
else(WIN32) else(WIN32)
cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS}
DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array) DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array)
......
...@@ -74,7 +74,7 @@ PadConstantLikeOp Operator. ...@@ -74,7 +74,7 @@ PadConstantLikeOp Operator.
Pad input(Y) with a pad_value, the number of values padded to the edges of each Pad input(Y) with a pad_value, the number of values padded to the edges of each
axis is specified by the difference of the shape of X and Y. axis is specified by the difference of the shape of X and Y.
((0, shape_x_0 - shape_y_0), (0, shape_x_n - shape_y_n)) unique pad widths for ((0, shape_x_0 - shape_y_0), ... (0, shape_x_n - shape_y_n)) unique pad widths for
each axis. each axis.
The input should be a k-D tensor(k > 0 and k < 7). As an example: The input should be a k-D tensor(k > 0 and k < 7). As an example:
......
...@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -122,7 +122,7 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor), " "(Tensor), "
"Argmaxes corresponding to indices in X used " "Argmaxes corresponding to indices in X used "
"for gradient computation. Only output " "for gradient computation. Only output "
"if arg “is_test” is false.") "if arg \"is_test\" is false.")
.AsIntermediate(); .AsIntermediate();
AddAttr<float>("spatial_scale", AddAttr<float>("spatial_scale",
"(float, default 1.0), " "(float, default 1.0), "
......
...@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -57,8 +57,8 @@ class Unpool2dOpMaker : public framework::OpProtoAndCheckerMaker {
Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is: Input shape is: $(N, C_{in}, H_{in}, W_{in})$, Output shape is:
$(N, C_{out}, H_{out}, W_{out})$, where $(N, C_{out}, H_{out}, W_{out})$, where
$$ $$
H_{out} = (H_{in}−1) * strides[0] − 2 * paddings[0] + ksize[0] \\ H_{out} = (H_{in}-1) * strides[0] - 2 * paddings[0] + ksize[0] \\
W_{out} = (W_{in}−1) * strides[1] − 2 * paddings[1] + ksize[1] W_{out} = (W_{in}-1) * strides[1] - 2 * paddings[1] + ksize[1]
$$ $$
Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf Paper: http://www.matthewzeiler.com/wp-content/uploads/2017/07/iccv2011.pdf
)DOC"); )DOC");
......
...@@ -22,6 +22,10 @@ if(WITH_PYTHON) ...@@ -22,6 +22,10 @@ if(WITH_PYTHON)
endif(WITH_AMD_GPU) endif(WITH_AMD_GPU)
if(WIN32) if(WIN32)
if(WITH_GPU AND NOT WITH_DSO)
get_property(cuda_modules GLOBAL PROPERTY CUDA_MODULES)
target_link_libraries(paddle_pybind ${cuda_modules})
endif(WITH_GPU AND NOT WITH_DSO)
target_link_libraries(paddle_pybind shlwapi) target_link_libraries(paddle_pybind shlwapi)
endif(WIN32) endif(WIN32)
......
...@@ -61,12 +61,13 @@ IF(WIN32) ...@@ -61,12 +61,13 @@ IF(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
# COMMAND ${CMAKE_COMMAND} -E touch stub.cc # COMMAND ${CMAKE_COMMAND} -E touch stub.cc
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python/paddle
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/ COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_SOURCE_DIR}/paddle/py_paddle ${PADDLE_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python # COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/libs
COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/lib-python # COMMAND ${CMAKE_COMMAND} -E copy ${PADDLE_PYTHON_BUILD_DIR}/libs ${PADDLE_PYTHON_BUILD_DIR}/libs
DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) DEPENDS gen_proto_py copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
ELSE(WIN32) ELSE(WIN32)
add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os
# import all class inside framework into fluid module # import all class inside framework into fluid module
from . import framework from . import framework
from .framework import * from .framework import *
...@@ -43,16 +44,17 @@ from .lod_tensor import create_lod_tensor, create_random_int_lodtensor ...@@ -43,16 +44,17 @@ from .lod_tensor import create_lod_tensor, create_random_int_lodtensor
from . import clip from . import clip
from . import profiler from . import profiler
from . import unique_name from . import unique_name
from . import recordio_writer if os.name != 'nt':
from . import parallel_executor from . import recordio_writer
from .parallel_executor import * from . import parallel_executor
from .parallel_executor import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable from paddle.fluid.layers.math_op_patch import monkey_patch_variable
Tensor = LoDTensor Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + \ __all__ = framework.__all__ + executor.__all__ + \
trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \ trainer.__all__ + inferencer.__all__ + transpiler.__all__ + \
parallel_executor.__all__ + lod_tensor.__all__ + [ lod_tensor.__all__ + [
'io', 'io',
'initializer', 'initializer',
'layers', 'layers',
...@@ -78,7 +80,8 @@ __all__ = framework.__all__ + executor.__all__ + \ ...@@ -78,7 +80,8 @@ __all__ = framework.__all__ + executor.__all__ + \
'recordio_writer', 'recordio_writer',
'Scope', 'Scope',
] ]
if os.name != 'nt':
__all__ += parallel_executor.__all__
def __bootstrap__(): def __bootstrap__():
""" """
...@@ -110,12 +113,16 @@ def __bootstrap__(): ...@@ -110,12 +113,16 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads) os.environ['OMP_NUM_THREADS'] = str(num_threads)
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark',
'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb',
'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads', 'init_allocated_mem', 'free_idle_memory', 'paddle_num_threads',
'dist_threadpool_size', 'cpu_deterministic', 'eager_delete_tensor_gb', 'dist_threadpool_size', 'eager_delete_tensor_gb',
'reader_queue_speed_test_mode' 'reader_queue_speed_test_mode'
] ]
if os.name != 'nt':
read_env_flags.append('warpctc_dir')
read_env_flags.append('cpu_deterministic')
if core.is_compiled_with_dist(): if core.is_compiled_with_dist():
read_env_flags.append('rpc_deadline') read_env_flags.append('rpc_deadline')
read_env_flags.append('rpc_server_profile_period') read_env_flags.append('rpc_server_profile_period')
......
...@@ -15,13 +15,15 @@ ...@@ -15,13 +15,15 @@
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import os
from .. import core from .. import core
from .. import executor from .. import executor
from .. import framework from .. import framework
from .. import io from .. import io
from .. import parallel_executor if os.name != 'nt':
from .. import parallel_executor
from .. import unique_name from .. import unique_name
from .trainer import check_and_get_place from .trainer import check_and_get_place
......
...@@ -28,7 +28,8 @@ from .. import framework ...@@ -28,7 +28,8 @@ from .. import framework
from .. import io from .. import io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module # optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from .. import optimizer as opt_module from .. import optimizer as opt_module
from .. import parallel_executor if os.name != 'nt':
from .. import parallel_executor
from ..transpiler import distribute_transpiler from ..transpiler import distribute_transpiler
__all__ = [ __all__ = [
......
...@@ -536,7 +536,7 @@ class Operator(object): ...@@ -536,7 +536,7 @@ class Operator(object):
OP_WITHOUT_KERNEL_SET = { OP_WITHOUT_KERNEL_SET = {
'feed', 'fetch', 'save', 'load', 'recurrent', 'go', 'feed', 'fetch', 'save', 'load', 'recurrent', 'go',
'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv', 'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv',
'listen_and_serv', 'parallel_do', 'save_combine', 'load_combine', 'listen_and_serv', 'parallel_do', 'save_combine', 'loadload_combine',
'ncclInit', 'select', 'checkpoint_notify', 'gen_nccl_id' 'ncclInit', 'select', 'checkpoint_notify', 'gen_nccl_id'
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
import contextlib import contextlib
import multiprocessing import multiprocessing
import os
import six import six
import threading import threading
...@@ -344,70 +345,71 @@ def _copy_reader_create_op_(block, op): ...@@ -344,70 +345,71 @@ def _copy_reader_create_op_(block, op):
return new_op return new_op
@templatedoc(op_type='create_recordio_file_reader') if os.name != 'nt':
def open_recordio_file(filename, @templatedoc(op_type='create_recordio_file_reader')
shapes, def open_recordio_file(filename,
lod_levels, shapes,
dtypes, lod_levels,
pass_num=1, dtypes,
for_parallel=True): pass_num=1,
""" for_parallel=True):
${comment} """
${comment}
Args:
filename(${filename_type}): ${filename_comment}. Args:
shapes(list): List of tuples which declaring data shapes. filename(${filename_type}): ${filename_comment}.
lod_levels(${lod_levels_type}): ${lod_levels_comment}. shapes(list): List of tuples which declaring data shapes.
dtypes(list): List of strs which declaring data type. lod_levels(${lod_levels_type}): ${lod_levels_comment}.
pass_num(int): Number of passes to run. dtypes(list): List of strs which declaring data type.
for_parallel(Bool): Set it as True if you are going to run pass_num(int): Number of passes to run.
subsequent operators in parallel. for_parallel(Bool): Set it as True if you are going to run
subsequent operators in parallel.
Returns:
${out_comment}. Returns:
${out_comment}.
Examples:
Examples:
>>> import paddle.fluid as fluid
>>> reader = fluid.layers.io.open_recordio_file( >>> import paddle.fluid as fluid
>>> filename='./data.recordio', >>> reader = fluid.layers.io.open_recordio_file(
>>> shapes=[(3,224,224), (1)], >>> filename='./data.recordio',
>>> lod_levels=[0, 0], >>> shapes=[(3,224,224), (1)],
>>> dtypes=['float32', 'int64']) >>> lod_levels=[0, 0],
>>> # Via the reader, we can use 'read_file' layer to get data: >>> dtypes=['float32', 'int64'])
>>> image, label = fluid.layers.io.read_file(reader) >>> # Via the reader, we can use 'read_file' layer to get data:
""" >>> image, label = fluid.layers.io.read_file(reader)
dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] """
shape_concat = [] dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
ranks = [] shape_concat = []
ranks = []
for shape in shapes:
shape_concat.extend(shape) for shape in shapes:
ranks.append(len(shape)) shape_concat.extend(shape)
ranks.append(len(shape))
var_name = unique_name('open_recordio_file')
var_name = unique_name('open_recordio_file')
startup_blk = default_startup_program().current_block()
startup_var = startup_blk.create_var(name=var_name) startup_blk = default_startup_program().current_block()
startup_blk.append_op( startup_var = startup_blk.create_var(name=var_name)
type='create_recordio_file_reader', startup_blk.append_op(
outputs={'Out': [startup_var]}, type='create_recordio_file_reader',
attrs={ outputs={'Out': [startup_var]},
'shape_concat': shape_concat, attrs={
'lod_levels': lod_levels, 'shape_concat': shape_concat,
'filename': filename, 'lod_levels': lod_levels,
'ranks': ranks 'filename': filename,
}) 'ranks': ranks
})
startup_var.desc.set_dtypes(dtypes) startup_var.desc.set_dtypes(dtypes)
startup_var.persistable = True startup_var.persistable = True
main_prog_var = _copy_reader_var_(default_main_program().current_block(), main_prog_var = _copy_reader_var_(default_main_program().current_block(),
startup_var) startup_var)
if pass_num > 1: if pass_num > 1:
main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num) main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num)
return monkey_patch_reader_methods(main_prog_var) return monkey_patch_reader_methods(main_prog_var)
def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
......
...@@ -18,6 +18,7 @@ All layers just related to the neural network. ...@@ -18,6 +18,7 @@ All layers just related to the neural network.
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
import os
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant from ..initializer import Normal, Constant
from ..framework import Variable, OpProtoHolder from ..framework import Variable, OpProtoHolder
...@@ -31,12 +32,10 @@ from functools import reduce ...@@ -31,12 +32,10 @@ from functools import reduce
__all__ = [ __all__ = [
'fc', 'fc',
'embedding', 'embedding',
'dynamic_lstm',
'dynamic_lstmp', 'dynamic_lstmp',
'dynamic_gru', 'dynamic_gru',
'gru_unit', 'gru_unit',
'linear_chain_crf', 'linear_chain_crf',
'crf_decoding',
'cos_sim', 'cos_sim',
'cross_entropy', 'cross_entropy',
'square_error_cost', 'square_error_cost',
...@@ -95,7 +94,6 @@ __all__ = [ ...@@ -95,7 +94,6 @@ __all__ = [
'pad', 'pad',
'pad_constant_like', 'pad_constant_like',
'label_smooth', 'label_smooth',
'roi_pool',
'roi_align', 'roi_align',
'dice_loss', 'dice_loss',
'image_resize', 'image_resize',
...@@ -160,6 +158,10 @@ __all__ = [ ...@@ -160,6 +158,10 @@ __all__ = [
'log_loss', 'log_loss',
'add_position_encoding', 'add_position_encoding',
] ]
if os.name != 'nt':
__all__.append('dynamic_lstm')
__all__.append('crf_decoding')
__all__.append('roi_pool')
def fc(input, def fc(input,
...@@ -334,126 +336,127 @@ def embedding(input, ...@@ -334,126 +336,127 @@ def embedding(input,
return tmp return tmp
@templatedoc(op_type="lstm") if os.name != 'nt':
def dynamic_lstm(input, @templatedoc(op_type="lstm")
size, def dynamic_lstm(input,
h_0=None, size,
c_0=None, h_0=None,
param_attr=None, c_0=None,
bias_attr=None, param_attr=None,
use_peepholes=True, bias_attr=None,
is_reverse=False, use_peepholes=True,
gate_activation='sigmoid', is_reverse=False,
cell_activation='tanh', gate_activation='sigmoid',
candidate_activation='tanh', cell_activation='tanh',
dtype='float32', candidate_activation='tanh',
name=None): dtype='float32',
""" name=None):
${comment} """
${comment}
Args:
input (Variable): ${input_comment} Args:
size (int): 4 * hidden size. input (Variable): ${input_comment}
h_0(Variable): The initial hidden state is an optional input, default is zero. size (int): 4 * hidden size.
This is a tensor with shape (N x D), where N is the h_0(Variable): The initial hidden state is an optional input, default is zero.
batch size and D is the hidden size. This is a tensor with shape (N x D), where N is the
c_0(Variable): The initial cell state is an optional input, default is zero. batch size and D is the hidden size.
This is a tensor with shape (N x D), where N is the c_0(Variable): The initial cell state is an optional input, default is zero.
batch size. `h_0` and `c_0` can be NULL but only at the same time. This is a tensor with shape (N x D), where N is the
param_attr(ParamAttr|None): The parameter attribute for the learnable batch size. `h_0` and `c_0` can be NULL but only at the same time.
hidden-hidden weights. param_attr(ParamAttr|None): The parameter attribute for the learnable
hidden-hidden weights.
- Weights = {:math:`W_{ch}, W_{ih}, \
W_{fh}, W_{oh}`} - Weights = {:math:`W_{ch}, W_{ih}, \
- The shape is (D x 4D), where D is the hidden W_{fh}, W_{oh}`}
size. - The shape is (D x 4D), where D is the hidden
size.
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as param_attr. If it is set to None or one attribute of ParamAttr,
If the Initializer of the param_attr is not set, the dynamic_lstm will create ParamAttr as param_attr.
parameter is initialized with Xavier. Default: None. If the Initializer of the param_attr is not set, the
bias_attr (ParamAttr|None): The bias attribute for the learnable bias parameter is initialized with Xavier. Default: None.
weights, which contains two parts, input-hidden bias_attr (ParamAttr|None): The bias attribute for the learnable bias
bias weights and peephole connections weights if weights, which contains two parts, input-hidden
setting `use_peepholes` to `True`. bias weights and peephole connections weights if
setting `use_peepholes` to `True`.
1. `use_peepholes = False`
- Biases = {:math:`b_c, b_i, b_f, b_o`}. 1. `use_peepholes = False`
- The shape is (1 x 4D). - Biases = {:math:`b_c, b_i, b_f, b_o`}.
2. `use_peepholes = True` - The shape is (1 x 4D).
- Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ 2. `use_peepholes = True`
W_{fc}, W_{oc}`}. - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
- The shape is (1 x 7D). W_{fc}, W_{oc}`}.
- The shape is (1 x 7D).
If it is set to None or one attribute of ParamAttr,
dynamic_lstm will create ParamAttr as bias_attr. If it is set to None or one attribute of ParamAttr,
If the Initializer of the bias_attr is not set, dynamic_lstm will create ParamAttr as bias_attr.
the bias is initialized zero. Default: None. If the Initializer of the bias_attr is not set,
use_peepholes (bool): ${use_peepholes_comment} the bias is initialized zero. Default: None.
is_reverse (bool): ${is_reverse_comment} use_peepholes (bool): ${use_peepholes_comment}
gate_activation (str): ${gate_activation_comment} is_reverse (bool): ${is_reverse_comment}
cell_activation (str): ${cell_activation_comment} gate_activation (str): ${gate_activation_comment}
candidate_activation (str): ${candidate_activation_comment} cell_activation (str): ${cell_activation_comment}
dtype (str): Data type. Choices = ["float32", "float64"], default "float32". candidate_activation (str): ${candidate_activation_comment}
name (str|None): A name for this layer(optional). If set None, the layer dtype (str): Data type. Choices = ["float32", "float64"], default "float32".
will be named automatically. name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
tuple: The hidden state, and cell state of LSTM. The shape of both \ Returns:
is (T x D), and lod is the same with the `input`. tuple: The hidden state, and cell state of LSTM. The shape of both \
is (T x D), and lod is the same with the `input`.
Examples:
.. code-block:: python Examples:
.. code-block:: python
hidden_dim = 512
forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4, hidden_dim = 512
bias_attr=False) forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
forward, _ = fluid.layers.dynamic_lstm( bias_attr=False)
input=forward_proj, size=hidden_dim * 4, use_peepholes=False) forward, _ = fluid.layers.dynamic_lstm(
""" input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp." """
helper = LayerHelper('lstm', **locals()) assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
size = size // 4 helper = LayerHelper('lstm', **locals())
weight = helper.create_parameter( size = size // 4
attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype) weight = helper.create_parameter(
bias_size = [1, 7 * size] attr=helper.param_attr, shape=[size, 4 * size], dtype=dtype)
if not use_peepholes: bias_size = [1, 7 * size]
bias_size[1] = 4 * size if not use_peepholes:
bias = helper.create_parameter( bias_size[1] = 4 * size
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True) bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
hidden = helper.create_variable_for_type_inference(dtype) hidden = helper.create_variable_for_type_inference(dtype)
cell = helper.create_variable_for_type_inference(dtype) cell = helper.create_variable_for_type_inference(dtype)
batch_gate = helper.create_variable_for_type_inference(dtype) batch_gate = helper.create_variable_for_type_inference(dtype)
batch_cell_pre_act = helper.create_variable_for_type_inference(dtype) batch_cell_pre_act = helper.create_variable_for_type_inference(dtype)
inputs = {'Input': input, 'Weight': weight, 'Bias': bias} inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
batch_size = input.shape[0] batch_size = input.shape[0]
if h_0: if h_0:
assert h_0.shape == (batch_size, size), \ assert h_0.shape == (batch_size, size), \
'The shape of h0 should be (batch_size, %d)' % size 'The shape of h0 should be (batch_size, %d)' % size
inputs['H0'] = h_0 inputs['H0'] = h_0
if c_0: if c_0:
assert c_0.shape == (batch_size, size), \ assert c_0.shape == (batch_size, size), \
'The shape of c0 should be (batch_size, %d)' % size 'The shape of c0 should be (batch_size, %d)' % size
inputs['C0'] = c_0 inputs['C0'] = c_0
helper.append_op( helper.append_op(
type='lstm', type='lstm',
inputs=inputs, inputs=inputs,
outputs={ outputs={
'Hidden': hidden, 'Hidden': hidden,
'Cell': cell, 'Cell': cell,
'BatchGate': batch_gate, 'BatchGate': batch_gate,
'BatchCellPreAct': batch_cell_pre_act 'BatchCellPreAct': batch_cell_pre_act
}, },
attrs={ attrs={
'use_peepholes': use_peepholes, 'use_peepholes': use_peepholes,
'is_reverse': is_reverse, 'is_reverse': is_reverse,
'gate_activation': gate_activation, 'gate_activation': gate_activation,
'cell_activation': cell_activation, 'cell_activation': cell_activation,
'candidate_activation': candidate_activation 'candidate_activation': candidate_activation
}) })
return hidden, cell return hidden, cell
def dynamic_lstmp(input, def dynamic_lstmp(input,
...@@ -923,39 +926,40 @@ def linear_chain_crf(input, label, param_attr=None): ...@@ -923,39 +926,40 @@ def linear_chain_crf(input, label, param_attr=None):
return log_likelihood return log_likelihood
@templatedoc() if os.name != 'nt':
def crf_decoding(input, param_attr, label=None): @templatedoc()
""" def crf_decoding(input, param_attr, label=None):
${comment} """
${comment}
Args: Args:
input(${emission_type}): ${emission_comment} input(${emission_type}): ${emission_comment}
param_attr(ParamAttr): The parameter attribute for training. param_attr(ParamAttr): The parameter attribute for training.
label(${label_type}): ${label_comment} label(${label_type}): ${label_comment}
Returns: Returns:
Variable: ${viterbi_path_comment} Variable: ${viterbi_path_comment}
Examples: Examples:
.. code-block:: python .. code-block:: python
crf_decode = layers.crf_decoding( crf_decode = layers.crf_decoding(
input=hidden, param_attr=ParamAttr(name="crfw")) input=hidden, param_attr=ParamAttr(name="crfw"))
""" """
helper = LayerHelper('crf_decoding', **locals()) helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name) transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_variable_for_type_inference( viterbi_path = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()) dtype=helper.input_dtype())
helper.append_op( helper.append_op(
type='crf_decoding', type='crf_decoding',
inputs={"Emission": [input], inputs={"Emission": [input],
"Transition": transition, "Transition": transition,
"Label": label}, "Label": label},
outputs={"ViterbiPath": [viterbi_path]}) outputs={"ViterbiPath": [viterbi_path]})
return viterbi_path return viterbi_path
@templatedoc() @templatedoc()
...@@ -5443,42 +5447,43 @@ def label_smooth(label, ...@@ -5443,42 +5447,43 @@ def label_smooth(label,
return smooth_label return smooth_label
@templatedoc() if os.name != 'nt':
def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): @templatedoc()
""" def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0):
${comment} """
${comment}
Args: Args:
input (Variable): ${x_comment} input (Variable): ${x_comment}
rois (Variable): ROIs (Regions of Interest) to pool over. rois (Variable): ROIs (Regions of Interest) to pool over.
pooled_height (integer): ${pooled_height_comment} Default: 1 pooled_height (integer): ${pooled_height_comment} Default: 1
pooled_width (integer): ${pooled_width_comment} Default: 1 pooled_width (integer): ${pooled_width_comment} Default: 1
spatial_scale (float): ${spatial_scale_comment} Default: 1.0 spatial_scale (float): ${spatial_scale_comment} Default: 1.0
Returns: Returns:
Variable: ${out_comment}. Variable: ${out_comment}.
Examples: Examples:
.. code-block:: python .. code-block:: python
pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0) pool_out = fluid.layers.roi_pool(input=x, rois=rois, 7, 7, 1.0)
""" """
helper = LayerHelper('roi_pool', **locals()) helper = LayerHelper('roi_pool', **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype) pool_out = helper.create_variable_for_type_inference(dtype)
argmaxes = helper.create_variable_for_type_inference(dtype='int32') argmaxes = helper.create_variable_for_type_inference(dtype='int32')
helper.append_op( helper.append_op(
type="roi_pool", type="roi_pool",
inputs={"X": input, inputs={"X": input,
"ROIs": rois}, "ROIs": rois},
outputs={"Out": pool_out, outputs={"Out": pool_out,
"Argmax": argmaxes}, "Argmax": argmaxes},
attrs={ attrs={
"pooled_height": pooled_height, "pooled_height": pooled_height,
"pooled_width": pooled_width, "pooled_width": pooled_width,
"spatial_scale": spatial_scale "spatial_scale": spatial_scale
}) })
return pool_out return pool_out
@templatedoc() @templatedoc()
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import os
from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr
from .. import core from .. import core
from ..framework import convert_np_dtype_to_dtype_ from ..framework import convert_np_dtype_to_dtype_
...@@ -99,27 +100,28 @@ Examples: ...@@ -99,27 +100,28 @@ Examples:
>>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3)
""" """
__all__ += ['cumsum'] if os.name != 'nt':
__all__ += ['cumsum']
_cum_sum_ = generate_layer_fn('cumsum') _cum_sum_ = generate_layer_fn('cumsum')
def cumsum(x, axis=None, exclusive=None, reverse=None): def cumsum(x, axis=None, exclusive=None, reverse=None):
locals_var = locals().keys() locals_var = locals().keys()
kwargs = dict() kwargs = dict()
for name in locals_var: for name in locals_var:
val = locals()[name] val = locals()[name]
if val is not None: if val is not None:
kwargs[name] = val kwargs[name] = val
return _cum_sum_(**kwargs) return _cum_sum_(**kwargs)
cumsum.__doc__ = _cum_sum_.__doc__ + """
Examples:
>>> data = fluid.layers.data(name="input", shape=[32, 784]) cumsum.__doc__ = _cum_sum_.__doc__ + """
>>> result = fluid.layers.cumsum(data, axis=0) Examples:
"""
>>> data = fluid.layers.data(name="input", shape=[32, 784])
>>> result = fluid.layers.cumsum(data, axis=0)
"""
__all__ += ['thresholded_relu'] __all__ += ['thresholded_relu']
......
...@@ -180,7 +180,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': ...@@ -180,7 +180,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
package_data['paddle.libs']+=['libmkldnn.so.0'] package_data['paddle.libs']+=['libmkldnn.so.0']
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
# remove unused paddle/libs/__init__.py # remove unused paddle/libs/__init__.py
os.remove(libs_path+'/__init__.py') if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
package_dir['paddle.libs']=libs_path package_dir['paddle.libs']=libs_path
# change rpath of core.so, add $ORIGIN/../libs/ to it. # change rpath of core.so, add $ORIGIN/../libs/ to it.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册