From 1a7559718e0983e321ebf821d4dd452d3965cdee Mon Sep 17 00:00:00 2001 From: GaoWei8 <53294385+GaoWei8@users.noreply.github.com> Date: Fri, 18 Sep 2020 11:16:02 +0800 Subject: [PATCH 001/117] fix cudnn dyload (#27308) * fix cudnn dyload error --- paddle/fluid/operators/cudnn_lstm_cache.h | 10 ++++++++++ paddle/fluid/platform/cudnn_helper.h | 2 ++ paddle/fluid/platform/dynload/cudnn.cc | 4 ++++ paddle/fluid/platform/dynload/cudnn.h | 21 +++++++++++++-------- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/cudnn_lstm_cache.h b/paddle/fluid/operators/cudnn_lstm_cache.h index 4b46e2b475e..3181e4b1d99 100644 --- a/paddle/fluid/operators/cudnn_lstm_cache.h +++ b/paddle/fluid/operators/cudnn_lstm_cache.h @@ -54,6 +54,8 @@ class ScopedRNNBase { x_descs_.emplace_back(x_desc_.descriptor(dims_x, strides_x)); y_descs_.emplace_back(y_desc_.descriptor(dims_y, strides_y)); } + +#if CUDNN_VERSION >= 7201 if (!sequence_length.empty()) { x_seq_desc_.descriptor(seq_length_, batch_size_, input_size_, true, sequence_length); @@ -61,6 +63,7 @@ class ScopedRNNBase { hidden_size_ * numDirections, true, sequence_length); } +#endif // ------------------- cudnn hx, hy, cx, cy descriptors---------- std::vector dims_hx = {num_layers_ * numDirections, batch_size_, @@ -96,10 +99,13 @@ class ScopedRNNBase { is_bidirec_ ? CUDNN_BIDIRECTIONAL : CUDNN_UNIDIRECTIONAL, CUDNN_LSTM, cudnn_type)); #endif + +#if CUDNN_VERSION >= 7201 if (!sequence_length.empty()) { PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetRNNPaddingMode( rnn_desc_.desc(), CUDNN_RNN_PADDED_IO_ENABLED)); } +#endif // ------------------- cudnn weights_size --------------------- size_t weights_size_; @@ -125,8 +131,10 @@ class ScopedRNNBase { } cudnnTensorDescriptor_t* x_descs() { return x_descs_.data(); } cudnnTensorDescriptor_t* y_descs() { return y_descs_.data(); } +#if CUDNN_VERSION >= 7201 cudnnRNNDataDescriptor_t x_seq_desc() { return x_seq_desc_.desc(); } cudnnRNNDataDescriptor_t y_seq_desc() { return y_seq_desc_.desc(); } +#endif cudnnTensorDescriptor_t init_h_desc() { return init_h_desc_.desc(); } cudnnTensorDescriptor_t init_c_desc() { return init_c_desc_.desc(); } cudnnTensorDescriptor_t last_h_desc() { return last_h_desc_.desc(); } @@ -151,8 +159,10 @@ class ScopedRNNBase { platform::ScopedTensorDescriptor x_desc_; platform::ScopedTensorDescriptor y_desc_; +#if CUDNN_VERSION >= 7201 platform::ScopedRNNTensorDescriptor x_seq_desc_; platform::ScopedRNNTensorDescriptor y_seq_desc_; +#endif platform::ScopedTensorDescriptor init_h_desc_; platform::ScopedTensorDescriptor init_c_desc_; platform::ScopedTensorDescriptor last_h_desc_; diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index bb4c2a89f6f..4b9c5c429da 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -294,6 +294,7 @@ class ScopedTensorDescriptor { DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor); }; +#if CUDNN_VERSION >= 7201 class ScopedRNNTensorDescriptor { public: ScopedRNNTensorDescriptor() { @@ -337,6 +338,7 @@ class ScopedRNNTensorDescriptor { cudnnRNNDataDescriptor_t desc_; DISABLE_COPY_AND_ASSIGN(ScopedRNNTensorDescriptor); }; +#endif class ScopedDropoutDescriptor { public: diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index 44a03d6f14a..1166dc5e4ad 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -46,6 +46,10 @@ CUDNN_DNN_ROUTINE_EACH_R6(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_R7(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7 +CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DEFINE_WRAP); +#endif + #ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R7 CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP); #endif diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index 7e85cb57f33..fba41417648 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -101,9 +101,6 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(cudnnDropoutGetStatesSize); \ __macro(cudnnSetDropoutDescriptor); \ __macro(cudnnRestoreDropoutDescriptor); \ - __macro(cudnnCreateRNNDataDescriptor); \ - __macro(cudnnDestroyRNNDataDescriptor); \ - __macro(cudnnSetRNNDataDescriptor); \ __macro(cudnnCreateRNNDescriptor); \ __macro(cudnnGetRNNParamsSize); \ __macro(cudnnGetRNNWorkspaceSize); \ @@ -112,11 +109,6 @@ extern void EnforceCUDNNLoaded(const char* fn_name); __macro(cudnnRNNBackwardData); \ __macro(cudnnRNNBackwardWeights); \ __macro(cudnnRNNForwardInference); \ - __macro(cudnnRNNForwardTrainingEx); \ - __macro(cudnnSetRNNPaddingMode); \ - __macro(cudnnRNNBackwardDataEx); \ - __macro(cudnnRNNBackwardWeightsEx); \ - __macro(cudnnRNNForwardInferenceEx); \ __macro(cudnnDestroyDropoutDescriptor); \ __macro(cudnnDestroyRNNDescriptor); \ __macro(cudnnSetTensorNdDescriptorEx); @@ -188,6 +180,19 @@ CUDNN_DNN_ROUTINE_EACH_R6(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) CUDNN_DNN_ROUTINE_EACH_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #endif +#if CUDNN_VERSION >= 7201 +#define CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(__macro) \ + __macro(cudnnCreateRNNDataDescriptor); \ + __macro(cudnnDestroyRNNDataDescriptor); \ + __macro(cudnnSetRNNDataDescriptor); \ + __macro(cudnnSetRNNPaddingMode); \ + __macro(cudnnRNNForwardTrainingEx); \ + __macro(cudnnRNNBackwardDataEx); \ + __macro(cudnnRNNBackwardWeightsEx); \ + __macro(cudnnRNNForwardInferenceEx); +CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) +#endif + #if CUDNN_VERSION >= 7401 #define CUDNN_DNN_ROUTINE_EACH_AFTER_R7(__macro) \ __macro(cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize); \ -- GitLab From 03b0e0c42b571247e7dc0570eab58596fe601ae3 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Fri, 18 Sep 2020 11:21:04 +0800 Subject: [PATCH 002/117] Remove dependences of cv2 (#27286) * rm dependence of cv2 --- python/paddle/utils/__init__.py | 1 + python/paddle/utils/lazy_import.py | 34 +++++++++++ python/paddle/vision/datasets/folder.py | 3 +- python/paddle/vision/transforms/functional.py | 51 +++++++++++----- python/paddle/vision/transforms/transforms.py | 61 +++++++++++++++---- python/requirements.txt | 1 - python/setup.py.in | 3 - 7 files changed, 121 insertions(+), 33 deletions(-) create mode 100644 python/paddle/utils/lazy_import.py diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index 4a786679727..77f5ef7e966 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -16,6 +16,7 @@ from .profiler import ProfilerOptions from .profiler import Profiler from .profiler import get_profiler from .deprecated import deprecated +from .lazy_import import try_import from ..fluid.framework import unique_name from ..fluid.framework import load_op_library from ..fluid.framework import require_version diff --git a/python/paddle/utils/lazy_import.py b/python/paddle/utils/lazy_import.py new file mode 100644 index 00000000000..69a32b77a8f --- /dev/null +++ b/python/paddle/utils/lazy_import.py @@ -0,0 +1,34 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Lazy imports for heavy dependencies.""" + +import importlib + + +def try_import(module_name): + """Try importing a module, with an informative error message on failure.""" + install_name = module_name + if module_name == 'cv2': + install_name = 'opencv-python' + + try: + mod = importlib.import_module(module_name) + return mod + except ImportError: + err_msg = ( + "Failed importing {}. This likely means that some paddle modules " + "requires additional dependencies that have to be " + "manually installed (usually with `pip install {}`). ").format( + module_name, install_name) + raise ImportError(err_msg) diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index 725fd9acafb..8a3053abefc 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -14,9 +14,9 @@ import os import sys -import cv2 from paddle.io import Dataset +from paddle.utils import try_import __all__ = ["DatasetFolder", "ImageFolder"] @@ -191,6 +191,7 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif', def cv2_loader(path): + cv2 = try_import('cv2') return cv2.imread(path) diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py index b5668fa8c7d..acceb111e6f 100644 --- a/python/paddle/vision/transforms/functional.py +++ b/python/paddle/vision/transforms/functional.py @@ -18,10 +18,11 @@ import random import math import functools -import cv2 import numbers import numpy as np +from paddle.utils import try_import + if sys.version_info < (3, 3): Sequence = collections.Sequence Iterable = collections.Iterable @@ -54,8 +55,8 @@ def flip(image, code): Accordding to the code (the type of flip), flip the input image Args: - image: Input image, with (H, W, C) shape - code: Code that indicates the type of flip. + image (np.ndarray): Input image, with (H, W, C) shape + code (int): Code that indicates the type of flip. -1 : Flip horizontally and vertically 0 : Flip vertically 1 : Flip horizontally @@ -77,18 +78,28 @@ def flip(image, code): # flip horizontally F.flip(fake_img, 1) """ + cv2 = try_import('cv2') return cv2.flip(image, flipCode=code) @keepdims -def resize(img, size, interpolation=cv2.INTER_LINEAR): +def resize(img, size, interpolation=1): """ resize the input data to given size Args: - input: Input data, could be image or masks, with (H, W, C) shape - size: Target size of input data, with (height, width) shape. - interpolation: Interpolation method. + input (np.ndarray): Input data, could be image or masks, with (H, W, C) shape + size (int|list|tuple): Target size of input data, with (height, width) shape. + interpolation (int, optional): Interpolation method. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP Examples: .. code-block:: python @@ -102,7 +113,7 @@ def resize(img, size, interpolation=cv2.INTER_LINEAR): F.resize(fake_img, (200, 150)) """ - + cv2 = try_import('cv2') if isinstance(interpolation, Sequence): interpolation = random.choice(interpolation) @@ -179,6 +190,8 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'): assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'], \ 'Expected padding mode be either constant, edge, reflect or symmetric, but got {}'.format(padding_mode) + cv2 = try_import('cv2') + PAD_MOD = { 'constant': cv2.BORDER_CONSTANT, 'edge': cv2.BORDER_REPLICATE, @@ -214,18 +227,22 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'): @keepdims -def rotate(img, - angle, - interpolation=cv2.INTER_LINEAR, - expand=False, - center=None): +def rotate(img, angle, interpolation=1, expand=False, center=None): """Rotates the image by angle. Args: img (numpy.ndarray): Image to be rotated. angle (float|int): In degrees clockwise order. - interpolation (int, optional): - interpolation: Interpolation method. + interpolation (int, optional): Interpolation method. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP expand (bool|optional): Optional expansion flag. If true, expands the output image to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. @@ -250,8 +267,9 @@ def rotate(img, fake_img = rotate(fake_img, 10) print(fake_img.shape) """ - dtype = img.dtype + cv2 = try_import('cv2') + dtype = img.dtype h, w, _ = img.shape point = center or (w / 2, h / 2) M = cv2.getRotationMatrix2D(point, angle=-angle, scale=1) @@ -312,6 +330,7 @@ def to_grayscale(img, num_output_channels=1): fake_img = to_grayscale(fake_img) print(fake_img.shape) """ + cv2 = try_import('cv2') if num_output_channels == 1: img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 14809e0c1ac..9ea82827176 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -17,7 +17,6 @@ from __future__ import division import math import sys import random -import cv2 import numpy as np import numbers @@ -26,6 +25,7 @@ import collections import warnings import traceback +from paddle.utils import try_import from . import functional as F if sys.version_info < (3, 3): @@ -214,7 +214,16 @@ class Resize(object): smaller edge of the image will be matched to this number. i.e, if height > width, then image will be rescaled to (size * height / width, size) - interpolation (int): Interpolation mode of resize. Default: cv2.INTER_LINEAR. + interpolation (int, optional): Interpolation mode of resize. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP Examples: @@ -232,7 +241,7 @@ class Resize(object): print(fake_img.shape) """ - def __init__(self, size, interpolation=cv2.INTER_LINEAR): + def __init__(self, size, interpolation=1): assert isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2) self.size = size @@ -252,6 +261,16 @@ class RandomResizedCrop(object): output_size (int|list|tuple): Target size of output image, with (height, width) shape. scale (list|tuple): Range of size of the origin size cropped. Default: (0.08, 1.0) ratio (list|tuple): Range of aspect ratio of the origin aspect ratio cropped. Default: (0.75, 1.33) + interpolation (int, optional): Interpolation mode of resize. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP Examples: @@ -273,7 +292,7 @@ class RandomResizedCrop(object): output_size, scale=(0.08, 1.0), ratio=(3. / 4, 4. / 3), - interpolation=cv2.INTER_LINEAR): + interpolation=1): if isinstance(output_size, int): self.output_size = (output_size, output_size) else: @@ -328,7 +347,16 @@ class CenterCropResize(object): Args: size (int|list|tuple): Target size of output image, with (height, width) shape. crop_padding (int): Center crop with the padding. Default: 32. - interpolation (int): Interpolation mode of resize. Default: cv2.INTER_LINEAR. + interpolation (int, optional): Interpolation mode of resize. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP Examples: @@ -346,7 +374,7 @@ class CenterCropResize(object): print(fake_img.shape) """ - def __init__(self, size, crop_padding=32, interpolation=cv2.INTER_LINEAR): + def __init__(self, size, crop_padding=32, interpolation=1): if isinstance(size, int): self.size = (size, size) else: @@ -661,6 +689,7 @@ class ContrastTransform(object): if self.value == 0: return img + cv2 = try_import('cv2') dtype = img.dtype img = img.astype(np.float32) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) @@ -701,6 +730,8 @@ class SaturationTransform(object): if self.value == 0: return img + cv2 = try_import('cv2') + dtype = img.dtype img = img.astype(np.float32) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) @@ -742,6 +773,7 @@ class HueTransform(object): if self.value == 0: return img + cv2 = try_import('cv2') dtype = img.dtype img = img.astype(np.uint8) hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV_FULL) @@ -1036,7 +1068,16 @@ class RandomRotate(object): degrees (sequence or float or int): Range of degrees to select from. If degrees is a number instead of sequence like (min, max), the range of degrees will be (-degrees, +degrees) clockwise order. - interpolation (int|optional): Interpolation mode of resize. Default: cv2.INTER_LINEAR. + interpolation (int, optional): Interpolation mode of resize. Default: 1. + 0 : cv2.INTER_NEAREST + 1 : cv2.INTER_LINEAR + 2 : cv2.INTER_CUBIC + 3 : cv2.INTER_AREA + 4 : cv2.INTER_LANCZOS4 + 5 : cv2.INTER_LINEAR_EXACT + 7 : cv2.INTER_MAX + 8 : cv2.WARP_FILL_OUTLIERS + 16: cv2.WARP_INVERSE_MAP expand (bool|optional): Optional expansion flag. Default: False. If true, expands the output to make it large enough to hold the entire rotated image. If false or omitted, make the output image the same size as the input image. @@ -1061,11 +1102,7 @@ class RandomRotate(object): print(fake_img.shape) """ - def __init__(self, - degrees, - interpolation=cv2.INTER_LINEAR, - expand=False, - center=None): + def __init__(self, degrees, interpolation=1, expand=False, center=None): if isinstance(degrees, numbers.Number): if degrees < 0: raise ValueError( diff --git a/python/requirements.txt b/python/requirements.txt index 47888424755..6a88d61a94c 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,3 @@ -opencv-python<=4.2.0.32 requests>=2.20.0 numpy>=1.13, <=1.16.4 ; python_version<"3.5" numpy>=1.13 ; python_version>="3.5" diff --git a/python/setup.py.in b/python/setup.py.in index 77316640034..d85a23a5edd 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -237,9 +237,6 @@ if sys.version_info >= (3,7): setup_requires_tmp+=[setup_requires_i] setup_requires = setup_requires_tmp -if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: - setup_requires+=['opencv-python'] - # the prefix is sys.prefix which should always be usr paddle_bins = '' -- GitLab From 7e6dfcf9b22e650b9c9631c4fc6ec329d59e54b4 Mon Sep 17 00:00:00 2001 From: haozech Date: Fri, 18 Sep 2020 12:30:11 +0800 Subject: [PATCH 003/117] Add 3 pass version check (#27283) --- .../framework/ir/conv_elementwise_add2_act_fuse_pass.cc | 9 ++++++++- .../framework/ir/conv_elementwise_add_act_fuse_pass.cc | 8 ++++++++ .../fluid/framework/ir/conv_elementwise_add_fuse_pass.cc | 9 +++++++-- .../test_conv_elementwise_add2_act_fuse_pass.py | 4 ++++ .../inference/test_conv_elementwise_add_act_fuse_pass.py | 4 ++++ .../ir/inference/test_conv_elementwise_add_fuse_pass.py | 3 +++ 6 files changed, 34 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc index 2627da7dc40..ad6af69ae02 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc @@ -11,9 +11,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - #include "paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h" #include +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -116,3 +116,10 @@ void ConvElementwiseAdd2ActFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(conv_elementwise_add2_act_fuse_pass, paddle::framework::ir::ConvElementwiseAdd2ActFusePass); +REGISTER_PASS_CAPABILITY(conv_elementwise_add2_act_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0) + .EQ("identity", 0)); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc index 0b454a0407e..c5fa47ec55f 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" #include #include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -102,3 +103,10 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(conv_elementwise_add_act_fuse_pass, paddle::framework::ir::ConvElementwiseAddActFusePass); +REGISTER_PASS_CAPABILITY(conv_elementwise_add_act_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0) + .EQ("identity", 0)); diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc index 007770cf57d..38c0b773dde 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" +#include #include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -89,3 +89,8 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(conv_elementwise_add_fuse_pass, paddle::framework::ir::ConvElementwiseAddFusePass); +REGISTER_PASS_CAPABILITY(conv_elementwise_add_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("elementwise_add", 0)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py index d6dbd397b90..6907b6a7eb5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add2_act_fuse_pass.py @@ -19,6 +19,7 @@ import numpy as np from inference_pass_test import InferencePassTest import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig """Test for fusion of conv, elementwise_add and 2 act.""" @@ -46,6 +47,9 @@ class ConvElementwiseAdd2ActFusePassTest(InferencePassTest): if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible( + 'conv_elementwise_add2_act_fuse_pass')) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py index 2e9035420d7..6ff60aa6deb 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_act_fuse_pass.py @@ -19,6 +19,7 @@ import numpy as np from inference_pass_test import InferencePassTest import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig """Test for fusion of conv, elementwise_add and act.""" @@ -48,6 +49,9 @@ class ConvElementwiseAddActFusePassTest(InferencePassTest): if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible( + 'conv_elementwise_add_act_fuse_pass')) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py index 7c4e0d6e76e..96b046edaec 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_elementwise_add_fuse_pass.py @@ -19,6 +19,7 @@ import numpy as np from inference_pass_test import InferencePassTest import paddle.fluid as fluid import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker from paddle.fluid.core import AnalysisConfig """Test for fusion of conv and elementwise_add.""" @@ -44,6 +45,8 @@ class ConvElementwiseAddFusePassTest(InferencePassTest): if core.is_compiled_with_cuda(): use_gpu = True self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('conv_elementwise_add_fuse_pass')) if __name__ == "__main__": -- GitLab From fd7ab4e63c5960c0ba6a9c0ce0d00478cbc78c7f Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Fri, 18 Sep 2020 13:39:33 +0800 Subject: [PATCH 004/117] register pass compatibility (#27357) * pass compatibility * add compatibility registry * add unittests for different padding * add assert * drop errmsg --- .../ir/conv_affine_channel_fuse_pass.cc | 12 + .../fluid/framework/ir/conv_bn_fuse_pass.cc | 12 + .../ir/repeated_fc_relu_fuse_pass.cc | 10 + .../ir/shuffle_channel_detect_pass.cc | 8 + .../test_conv_affine_channel_fuse_pass.py | 228 ++++++++++++++++++ .../ir/inference/test_conv_bn_fuse_pass.py | 177 ++++++++++++++ .../test_repeated_fc_relu_fuse_pass.py | 94 ++++++++ .../test_trt_shuffle_channel_detect_pass.py | 51 ++++ 8 files changed, 592 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_conv_affine_channel_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc index b50b4f37cae..fd8b55a6b7d 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/enforce.h" @@ -225,3 +226,14 @@ REGISTER_PASS(conv_affine_channel_fuse_pass, paddle::framework::ir::ConvAffineChannelFusePass); REGISTER_PASS(conv_eltwiseadd_affine_channel_fuse_pass, paddle::framework::ir::ConvEltwiseAddAffineChannelFusePass); +REGISTER_PASS_CAPABILITY(conv_affine_channel_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("affine_channel", 0)); +REGISTER_PASS_CAPABILITY(conv_eltwiseadd_affine_channel_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("elementwise_add", 0) + .EQ("affine_channel", 0)); diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index 9d3e0806ac7..fb787e08814 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/enforce.h" @@ -372,3 +373,14 @@ REGISTER_PASS(depthwise_conv_bn_fuse_pass, paddle::framework::ir::DepthwiseConvBNFusePass); REGISTER_PASS(depthwise_conv_eltwiseadd_bn_fuse_pass, paddle::framework::ir::DepthwiseConvEltwiseAddBNFusePass); +REGISTER_PASS_CAPABILITY(conv_bn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("batch_norm", 0)); +REGISTER_PASS_CAPABILITY(conv_eltwiseadd_bn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("elementwise_add", 0) + .EQ("batch_norm", 0)); diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index 2396a7f3c4f..23f794c11c2 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" #define MAX_NUM_FC 10 @@ -174,6 +175,10 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, if (x->outputs.size() <= 0 || x->inputs.size() <= 0U) { return false; } + if (x->IsVar() && x->Var() && x->Var()->GetShape().size() > 2) { + LOG(WARNING) << "repeated fc relu only supports input dims = 2"; + return false; + } int fc_idx = FindFCIdx(x); if (fc_idx < 0) { return false; @@ -384,3 +389,8 @@ void RepeatedFCReluFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(repeated_fc_relu_fuse_pass, paddle::framework::ir::RepeatedFCReluFusePass); +REGISTER_PASS_CAPABILITY(repeated_fc_relu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("fc", 0) + .EQ("relu", 0)); diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index d9a65e71592..74ba0093a17 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -16,6 +16,7 @@ #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -34,6 +35,8 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { const std::string pattern_name = "shufflechannel_pattern"; FusePassBase::Init(pattern_name, graph); + LOG(WARNING) << "There is fluid.layers.shuffle_channel API already, you can " + "use it instead of (reshape + transpose +reshape)"; GraphPatternDetector gpd; auto* x = gpd.mutable_pattern() ->NewNode("x") @@ -93,3 +96,8 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(shuffle_channel_detect_pass, paddle::framework::ir::ShuffleChannelDetectPass); +REGISTER_PASS_CAPABILITY(shuffle_channel_detect_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("reshape2", 0) + .EQ("transpose2", 0)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_affine_channel_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_affine_channel_fuse_pass.py new file mode 100644 index 00000000000..ec0bd52e926 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_affine_channel_fuse_pass.py @@ -0,0 +1,228 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class ConvAffineChannelFusePassExplicitPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding=[1, 1, 1, 1], + bias_attr=False, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible('conv_affine_channel_fuse_pass')) + + +class ConvAffineChannelFusePassValidPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding='VALID', + bias_attr=False, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible('conv_affine_channel_fuse_pass')) + + +class ConvAffineChannelFusePassSamePaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding='SAME', + bias_attr=False, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible('conv_affine_channel_fuse_pass')) + + +class ConvEltwiseAddAffineChannelFusePassExplicitPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding=[1, 1, 1, 1], + bias_attr=param_attr, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible( + 'conv_eltwiseadd_affine_channel_fuse_pass')) + + +class ConvEltwiseAddAffineChannelFusePassValidPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding='VALID', + bias_attr=param_attr, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible( + 'conv_eltwiseadd_affine_channel_fuse_pass')) + + +class ConvEltwiseAddAffineChannelFusePassSamePaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + groups=3, + padding='Same', + bias_attr=param_attr, + act=None) + input_scale = fluid.layers.create_parameter( + shape=[3], dtype="float32") + input_bias = fluid.layers.create_parameter( + shape=[3], dtype="float32") + ac_out = fluid.layers.affine_channel( + x=conv_out, scale=input_scale, bias=input_bias) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [ac_out] + + def test_check_output(self): + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible( + 'conv_eltwiseadd_affine_channel_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py new file mode 100644 index 00000000000..ffe177e59d8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_conv_bn_fuse_pass.py @@ -0,0 +1,177 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class ConvBnFusePassExplicitPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding=[1, 1, 1, 1], + bias_attr=False, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue(PassVersionChecker.IsCompatible('conv_bn_fuse_pass')) + + +class ConvBnFusePassValidPaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding='VALID', + bias_attr=False, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue(PassVersionChecker.IsCompatible('conv_bn_fuse_pass')) + + +class ConvBnFusePassSamePaddingTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding='SAME', + bias_attr=False, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue(PassVersionChecker.IsCompatible('conv_bn_fuse_pass')) + + +class ConvEltwiseAddBnFuseExplicitPaddingPass(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding=[1, 1, 1, 1], + bias_attr=None, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('conv_eltwiseadd_bn_fuse_pass')) + + +class ConvEltwiseAddBnFuseValidPaddingPass(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding='VALID', + bias_attr=None, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('conv_eltwiseadd_bn_fuse_pass')) + + +class ConvEltwiseAddBnFuseSamePaddingPass(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + conv_out = fluid.layers.conv2d( + input=data, + num_filters=6, + filter_size=6, + groups=3, + padding='SAME', + bias_attr=None, + act=None) + bn_out = fluid.layers.batch_norm(conv_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [bn_out] + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('conv_eltwiseadd_bn_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py new file mode 100644 index 00000000000..c78884480da --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_repeated_fc_relu_fuse_pass.py @@ -0,0 +1,94 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class RepeatedFcReluFusePass3Test(InferencePassTest): + def setUp(self): + fc_num = 3 + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + bias_attr=param_attr, + act=None) + fc_outs = [] + fc_outs.append( + fluid.layers.fc(input=[conv_out], act="relu", size=1000)) + for i in range(1, fc_num): + fc_outs.append( + fluid.layers.fc( + input=[fc_outs[i - 1]], act="relu", size=1000)) + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [fc_outs[fc_num - 1]] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + self.assertTrue( + PassVersionChecker.IsCompatible('repeated_fc_relu_fuse_pass')) + + +class RepeatedFcReluFusePass9Test(InferencePassTest): + def setUp(self): + fc_num = 9 + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 64, 64], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.conv2d( + input=data, + num_filters=3, + filter_size=3, + bias_attr=param_attr, + act=None) + fc_outs = [] + fc_outs.append( + fluid.layers.fc(input=[conv_out], act="relu", size=1000)) + for i in range(1, fc_num): + fc_outs.append( + fluid.layers.fc( + input=[fc_outs[i - 1]], act="relu", size=1000)) + self.feeds = { + "data": np.random.random([1, 3, 64, 64]).astype("float32"), + } + self.fetch_list = [fc_outs[fc_num - 1]] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + self.assertTrue( + PassVersionChecker.IsCompatible('repeated_fc_relu_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py new file mode 100644 index 00000000000..e9c304496af --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py @@ -0,0 +1,51 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class ShuffleChannelFuseTRTPassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + reshape1 = fluid.layers.reshape(x=data, shape=[-1, 2, 3, 64, 64]) + trans = fluid.layers.transpose(x=reshape1, perm=[0, 2, 1, 3, 4]) + reshape2 = fluid.layers.reshape(x=trans, shape=[-1, 6, 64, 64]) + out = fluid.layers.batch_norm(reshape2, is_test=True) + + self.feeds = { + "data": np.random.random([1, 6, 64, 64]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = ShuffleChannelFuseTRTPassTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + + self.check_output() + + self.assertTrue( + PassVersionChecker.IsCompatible('shuffle_channel_detect_pass')) + + +if __name__ == "__main__": + unittest.main() -- GitLab From 9fdcfe89819ec3f0ba13a4fb3126a836a4b36a37 Mon Sep 17 00:00:00 2001 From: guofei <52460041+gfwm2013@users.noreply.github.com> Date: Fri, 18 Sep 2020 15:45:59 +0800 Subject: [PATCH 005/117] Support python3.8 (#26850) * Support python3.8 test=notest --- paddle/scripts/paddle_build.sh | 45 +++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 3de577d847d..ac89116fc49 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -121,6 +121,18 @@ function cmake_base() { else exit 1 fi + elif [ "$1" == "cp38-cp38" ]; then + if [ -d "/Library/Frameworks/Python.framework/Versions/3.8" ]; then + export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/ + export DYLD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/ + export PATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/:${PATH} + PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 + -DPYTHON_INCLUDE_DIR:PATH=/Library/Frameworks/Python.framework/Versions/3.8/include/python3.8/ + -DPYTHON_LIBRARY:FILEPATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/libpython3.8.dylib" + pip3.8 install --user -r ${PADDLE_ROOT}/python/requirements.txt + else + exit 1 + fi fi # delete `gym` to avoid modifying requirements.txt in *.whl sed -i .bak "/^gym$/d" ${PADDLE_ROOT}/python/requirements.txt @@ -176,6 +188,13 @@ function cmake_base() { -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.7.0/include/python3.7m -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.7.0/lib/libpython3.so" pip3.7 install -r ${PADDLE_ROOT}/python/requirements.txt + elif [ "$1" == "cp38-cp38" ]; then + export LD_LIBRARY_PATH=/opt/_internal/cpython-3.8.0/lib/:${LD_LIBRARY_PATH} + export PATH=/opt/_internal/cpython-3.8.0/bin/:${PATH} + export PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/_internal/cpython-3.8.0/bin/python3.8 + -DPYTHON_INCLUDE_DIR:PATH=/opt/_internal/cpython-3.8.0/include/python3.8 + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-3.8.0/lib/libpython3.so" + pip3.8 install -r ${PADDLE_ROOT}/python/requirements.txt fi else pip install -r ${PADDLE_ROOT}/python/requirements.txt @@ -514,6 +533,8 @@ EOF pip3.6 uninstall -y paddlepaddle elif [ "$1" == "cp37-cp37m" ]; then pip3.7 uninstall -y paddlepaddle + elif [ "$1" == "cp38-cp38" ]; then + pip3.8 uninstall -y paddlepaddle fi set -ex @@ -527,6 +548,8 @@ EOF pip3.6 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl elif [ "$1" == "cp37-cp37m" ]; then pip3.7 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl + elif [ "$1" == "cp38-cp38" ]; then + pip3.8 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl fi tmpfile_rand=`date +%s%N` tmpfile=$tmp_dir/$tmpfile_rand @@ -666,7 +689,7 @@ function generate_api_spec() { awk -F '(' '{print $NF}' $spec_path >${spec_path}.doc awk -F '(' '{$NF="";print $0}' $spec_path >${spec_path}.api - if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ]; then + if [ "$1" == "cp35-cp35m" ] || [ "$1" == "cp36-cp36m" ] || [ "$1" == "cp37-cp37m" ] || [ "$1" == "cp38-cp38" ]; then # Use sed to make python2 and python3 sepc keeps the same sed -i 's/arg0: str/arg0: unicode/g' $spec_path sed -i "s/\(.*Transpiler.*\).__init__ (ArgSpec(args=\['self'].*/\1.__init__ /g" $spec_path @@ -1244,21 +1267,25 @@ EOF ref_paddle35=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp35-cp35m-linux_x86_64.whl ref_paddle36=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp36-cp36m-linux_x86_64.whl ref_paddle37=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp37-cp37m-linux_x86_64.whl + ref_paddle38=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp38-cp38-linux_x86_64.whl ref_paddle2_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp27-cp27mu-linux_x86_64.whl ref_paddle35_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp35-cp35m-linux_x86_64.whl ref_paddle36_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp36-cp36m-linux_x86_64.whl ref_paddle37_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp37-cp37m-linux_x86_64.whl + ref_paddle38_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}-cp38-cp38-linux_x86_64.whl if [[ ${PADDLE_BRANCH} != "0.0.0" && ${WITH_MKL} == "ON" && ${WITH_GPU} == "ON" ]]; then ref_paddle2=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp27-cp27mu-linux_x86_64.whl ref_paddle35=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp35-cp35m-linux_x86_64.whl ref_paddle36=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp36-cp36m-linux_x86_64.whl ref_paddle37=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp37-cp37m-linux_x86_64.whl + ref_paddle38=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp38-cp38-linux_x86_64.whl ref_paddle2_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp27-cp27mu-linux_x86_64.whl ref_paddle35_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp35-cp35m-linux_x86_64.whl ref_paddle36_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp36-cp36m-linux_x86_64.whl ref_paddle37_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp37-cp37m-linux_x86_64.whl + ref_paddle38_whl=paddlepaddle${install_gpu}-${PADDLE_BRANCH}.post${ref_CUDA_MAJOR}${CUDNN_MAJOR}-cp38-cp38-linux_x86_64.whl fi #ref_paddle2_mv1="" @@ -1363,6 +1390,22 @@ EOF apt-get clean -y && \ rm -f ${ref_paddle37} && \ ldconfig +EOF + cat >> ${PADDLE_ROOT}/build/Dockerfile < /dev/null && \ + make -j8 > /dev/null && make altinstall > /dev/null && cd ../ && rm Python-3.8.0.tgz + RUN apt-get install -y libgtk2.0-dev dmidecode python3-tk && ldconfig && \ + pip3.8 install opencv-python && wget ${ref_web}/${ref_paddle38} && pip3.8 install ${ref_paddle38_whl}; apt-get install -f -y && \ + apt-get clean -y && \ + rm -f ${ref_paddle38} && \ + ldconfig EOF cat >> ${PADDLE_ROOT}/build/Dockerfile < Date: Fri, 18 Sep 2020 16:18:00 +0800 Subject: [PATCH 006/117] enhance dataset err msg (#27363) --- paddle/fluid/framework/data_feed.cc | 70 ++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 96d54ec8691..aec27bd9d91 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -527,6 +527,8 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { VLOG(0) << "error: the number of ids is a negative number: " << num; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " << all_slots_.size(); return false; } else if (num == 0) { VLOG(0) @@ -536,42 +538,66 @@ bool MultiSlotDataFeed::CheckFile(const char* filename) { "characters."; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " << all_slots_.size(); return false; } else if (errno == ERANGE || num > INT_MAX) { VLOG(0) << "error: the number of ids greater than INT_MAX"; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " << all_slots_.size(); return false; } if (all_slots_type_[i] == "float") { - for (int i = 0; i < num; ++i) { + for (int j = 0; j < num; ++j) { strtof(endptr, &endptr); if (errno == ERANGE) { VLOG(0) << "error: the value is out of the range of " "representable values for float"; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " + << all_slots_.size(); + VLOG(0) << "and in this slot: " << j + << " th id with total id number: " << num; return false; } - if (i + 1 != num && endptr - str == len) { + if (j + 1 != num && endptr - str == len) { VLOG(0) << "error: there is a wrong with the number of ids."; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " + << all_slots_.size(); + VLOG(0) << "and in this slot: " << j + << " th id with total id number: " << num; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } } } else if (all_slots_type_[i] == "uint64") { - for (int i = 0; i < num; ++i) { + for (int j = 0; j < num; ++j) { strtoull(endptr, &endptr, 10); if (errno == ERANGE) { VLOG(0) << "error: the value is out of the range of " "representable values for uint64_t"; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " + << all_slots_.size(); + VLOG(0) << "and in this slot: " << j + << " th id with total id number: " << num; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; } - if (i + 1 != num && endptr - str == len) { + if (j + 1 != num && endptr - str == len) { VLOG(0) << "error: there is a wrong with the number of ids."; + VLOG(0) << "Error occured when parsing " << i + << " th slot with total slots number: " + << all_slots_.size(); + VLOG(0) << "and in this slot: " << j + << " th id with total id number: " << num; VLOG(0) << "please check line<" << instance_cout << "> in file<" << filename << ">"; return false; @@ -632,8 +658,13 @@ bool MultiSlotDataFeed::ParseOneInstanceFromPipe( "The number of ids can not be zero, you need padding " "it in data generator; or if there is something wrong with " "the data, please check if the data contains unresolvable " - "characters.\nplease check this error line: %s", - str)); + "characters.\nplease check this error line: %s, \n Specifically, " + "something wrong happened(the length of this slot's feasign is 0)" + "when we parse the %d th slots." + "Maybe something wrong around this slot", + "\nWe detect the feasign number of this slot is %d, " + "which is illegal.", + str, i, num)); if (idx != -1) { (*instance)[idx].Init(all_slots_type_[i]); if ((*instance)[idx].GetType()[0] == 'f') { // float @@ -683,8 +714,13 @@ bool MultiSlotDataFeed::ParseOneInstance(std::vector* instance) { "The number of ids can not be zero, you need padding " "it in data generator; or if there is something wrong with " "the data, please check if the data contains unresolvable " - "characters.\nplease check this error line: %s.", - str)); + "characters.\nplease check this error line: %s, \n Specifically, " + "something wrong happened(the length of this slot's feasign is 0)" + "when we parse the %d th slots." + "Maybe something wrong around this slot", + "\nWe detect the feasign number of this slot is %d, " + "which is illegal.", + str, i, num)); if (idx != -1) { (*instance)[idx].Init(all_slots_type_[i]); @@ -916,8 +952,13 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) { "The number of ids can not be zero, you need padding " "it in data generator; or if there is something wrong with " "the data, please check if the data contains unresolvable " - "characters.\nplease check this error line: %s.", - str)); + "characters.\nplease check this error line: %s, \n Specifically, " + "something wrong happened(the length of this slot's feasign is 0)" + "when we parse the %d th slots." + "Maybe something wrong around this slot", + "\nWe detect the feasign number of this slot is %d, " + "which is illegal.", + str, i, num)); if (idx != -1) { if (all_slots_type_[i][0] == 'f') { // float for (int j = 0; j < num; ++j) { @@ -982,8 +1023,13 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) { "The number of ids can not be zero, you need padding " "it in data generator; or if there is something wrong with " "the data, please check if the data contains unresolvable " - "characters.\nplease check this error line: %s.", - str)); + "characters.\nplease check this error line: %s, \n Specifically, " + "something wrong happened(the length of this slot's feasign is 0)" + "when we parse the %d th slots." + "Maybe something wrong around this slot", + "\nWe detect the feasign number of this slot is %d, " + "which is illegal.", + str, i, num)); if (idx != -1) { if (all_slots_type_[i][0] == 'f') { // float -- GitLab From 4c5cfdea1b4fee23c0d3dfdcd93d2e0731790727 Mon Sep 17 00:00:00 2001 From: liu zhengxi <380185688@qq.com> Date: Fri, 18 Sep 2020 16:18:49 +0800 Subject: [PATCH 007/117] fix paddle.nn.Transformer api (#27391) --- .../tests/unittests/test_transformer_api.py | 135 ++++++++++++++++++ python/paddle/nn/layer/transformer.py | 102 ++++++++++--- 2 files changed, 217 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py index 5fea9f69a18..bd76edc9d8c 100644 --- a/python/paddle/fluid/tests/unittests/test_transformer_api.py +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -474,6 +474,141 @@ class TestTransformer(unittest.TestCase): trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) + def test_transformer_attr_1(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + + # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 + with fluid.dygraph.guard(fluid.CPUPlace()): + transformer = Transformer( + d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None], + bias_attr=[False]) + src = paddle.to_variable( + np.random.rand(batch_size, source_length, d_model).astype( + "float32")) + tgt = paddle.to_variable( + np.random.rand(batch_size, target_length, d_model).astype( + "float32")) + src_mask = np.zeros((batch_size, n_head, source_length, + source_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + src_mask = paddle.to_variable(src_mask) + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + tgt_mask, memory_mask = paddle.to_variable( + tgt_mask), paddle.to_variable(memory_mask) + trans_output = transformer(src, tgt, src_mask, tgt_mask, + memory_mask) + + def test_transformer_attr_2(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + + # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 + with fluid.dygraph.guard(fluid.CPUPlace()): + transformer = Transformer( + d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None, None], + bias_attr=[False, False]) + src = paddle.to_variable( + np.random.rand(batch_size, source_length, d_model).astype( + "float32")) + tgt = paddle.to_variable( + np.random.rand(batch_size, target_length, d_model).astype( + "float32")) + src_mask = np.zeros((batch_size, n_head, source_length, + source_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + src_mask = paddle.to_variable(src_mask) + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + tgt_mask, memory_mask = paddle.to_variable( + tgt_mask), paddle.to_variable(memory_mask) + trans_output = transformer(src, tgt, src_mask, tgt_mask, + memory_mask) + + def test_transformer_attr_3(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + + # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 + with fluid.dygraph.guard(fluid.CPUPlace()): + transformer = Transformer( + d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + weight_attr=[None, None, None], + bias_attr=[False, False, True]) + src = paddle.to_variable( + np.random.rand(batch_size, source_length, d_model).astype( + "float32")) + tgt = paddle.to_variable( + np.random.rand(batch_size, target_length, d_model).astype( + "float32")) + src_mask = np.zeros((batch_size, n_head, source_length, + source_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + src_mask = paddle.to_variable(src_mask) + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + tgt_mask, memory_mask = paddle.to_variable( + tgt_mask), paddle.to_variable(memory_mask) + trans_output = transformer(src, tgt, src_mask, tgt_mask, + memory_mask) + + def test_transformer_attr_boolean(self): + batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( + mode="decoder_layer") + + # batch_size, source_length, target_length, d_model, n_head = 4, 8, 8, 64, 8 + with fluid.dygraph.guard(fluid.CPUPlace()): + transformer = Transformer( + d_model, + n_head, + dim_feedforward=dim_feedforward, + dropout=dropout, + bias_attr=False) + src = paddle.to_variable( + np.random.rand(batch_size, source_length, d_model).astype( + "float32")) + tgt = paddle.to_variable( + np.random.rand(batch_size, target_length, d_model).astype( + "float32")) + src_mask = np.zeros((batch_size, n_head, source_length, + source_length)).astype("float32") + src_mask[0][0][0][0] = -np.inf + src_mask = paddle.to_variable(src_mask) + tgt_mask = np.zeros((batch_size, n_head, target_length, + target_length)).astype("float32") + tgt_mask[0][0][0][0] = -1e9 + memory_mask = np.zeros((batch_size, n_head, target_length, + source_length)).astype("float32") + memory_mask[0][0][0][0] = -1e9 + tgt_mask, memory_mask = paddle.to_variable( + tgt_mask), paddle.to_variable(memory_mask) + trans_output = transformer(src, tgt, src_mask, tgt_mask, + memory_mask) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 63069e83952..4b199d5816c 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -53,7 +53,22 @@ def _convert_param_attr_to_list(param_attr, n): if isinstance(param_attr, (list, tuple)): assert len(param_attr) == n, ( "length of param_attr should be %d when it is a list/tuple" % n) - param_attrs = [ParamAttr._to_attr(attr) for attr in param_attr] + param_attrs = [] + for attr in param_attr: + if isinstance(attr, bool): + if attr: + param_attrs.append(ParamAttr._to_attr(None)) + else: + param_attrs.append(False) + else: + param_attrs.append(ParamAttr._to_attr(attr)) + # param_attrs = [ParamAttr._to_attr(attr) for attr in param_attr] + elif isinstance(param_attr, bool): + param_attrs = [] + if param_attr: + param_attrs = [ParamAttr._to_attr(None) for i in range(n)] + else: + param_attrs = [False] * n else: param_attrs = [] attr = ParamAttr._to_attr(param_attr) @@ -417,7 +432,7 @@ class TransformerEncoderLayer(Layer): Otherwise, MHA and FFN both use it as `weight_attr` to create parameters. Default: None, which means the default weight parameter property is used. See usage for details in :code:`ParamAttr` . - bias_attr (ParamAttr|tuple, optional): To specify the bias parameter property. + bias_attr (ParamAttr|tuple|bool, optional): To specify the bias parameter property. If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for MHA, and `bias_attr[1]` would be used as `bias_attr` for linear in FFN. Otherwise, MHA and FFN both use it as `bias_attr` to create parameters. @@ -986,22 +1001,31 @@ class Transformer(Layer): Otherwise, no pre-process and post-precess includes dropout, residual connection, layer normalization. Default False weight_attr(ParamAttr|tuple, optional): To specify the weight parameter property. - If it is a tuple, `weight_attr[0]` would be used as `weight_attr` for - self attention, `weight_attr[1]` would be used as `weight_attr` for - cross attention, and `weight_attr[2]` would be used as `weight_attr` - for linear in FFN. Otherwise, the three sub-layers all uses it as - `weight_attr` to create parameters. Default: None, which means the - default weight parameter property is used. See usage for details + If it is a tuple, the length of `weight_attr` could be 1, 2 or 3. If it is 3, + `weight_attr[0]` would be used as `weight_attr` for self attention, `weight_attr[1]` + would be used as `weight_attr` for cross attention of `TransformerDecoder`, + and `weight_attr[2]` would be used as `weight_attr` for linear in FFN. + If it is 2, `weight_attr[0]` would be used as `weight_attr` both for self attention + and cross attntion and `weight_attr[1]` would be used as `weight_attr` for + linear in FFN. If it is 1, `weight_attr[0]` would be used as `weight_attr` + for self attention, cross attention and linear in FFN. Otherwise, + the three sub-layers all uses it as `weight_attr` to create parameters. + Default: None, which means the default weight parameter property is used. + See usage for details in :code:`ParamAttr` . bias_attr (ParamAttr|tuple, optional): To specify the bias parameter property. - If it is a tuple, `bias_attr[0]` would be used as `bias_attr` for - self attention, `bias_attr[1]` would be used as `bias_attr` for - cross attention, and `bias_attr[2]` would be used as `bias_attr` - for linear in FFN. Otherwise, the three sub-layers all uses it as - `bias_attr` to create parameters. The `False` value means the - corresponding layer would not have trainable bias parameter. See - usage for details in :code:`ParamAttr` . Default: None,which means - the default bias parameter property is used. + If it is a tuple, the length of `bias_attr` could be 1, 2 or 3. If it is 3, + `bias_attr[0]` would be used as `bias_attr` for self attention, `bias_attr[1]` + would be used as `bias_attr` for cross attention of `TransformerDecoder`, + and `bias_attr[2]` would be used as `bias_attr` for linear in FFN. + If it is 2, `bias_attr[0]` would be used as `bias_attr` both for self attention + and cross attntion and `bias_attr[1]` would be used as `bias_attr` for + linear in FFN. If it is 1, `bias_attr[0]` would be used as `bias_attr` + for self attention, cross attention and linear in FFN. Otherwise, + the three sub-layers all uses it as `bias_attr` to create parameters. + The `False` value means the corresponding layer would not have trainable + bias parameter. See usage for details in :code:`ParamAttr` . + Default: None,which means the default bias parameter property is used. custom_encoder (Layer): If custom encoder is provided, use it as the encoder. Default None custom_decoder (Layer): If custom decoder is provided, use it as the decoder. @@ -1049,13 +1073,51 @@ class Transformer(Layer): custom_decoder=None): super(Transformer, self).__init__() + if isinstance(bias_attr, (list, tuple)): + if len(bias_attr) == 1: + encoder_bias_attr = [bias_attr[0]] * 2 + decoder_bias_attr = [bias_attr[0]] * 3 + elif len(bias_attr) == 2: + encoder_bias_attr = bias_attr + decoder_bias_attr = [bias_attr[0], bias_attr[0], bias_attr[-1]] + elif len(bias_attr) == 3: + encoder_bias_attr = [bias_attr[0], bias_attr[-1]] + decoder_bias_attr = bias_attr + else: + assert False, ( + "length of bias_attr should be 1 or 2 or 3 when it is a list/tuple" + ) + else: + encoder_bias_attr = bias_attr + decoder_bias_attr = bias_attr + + if isinstance(weight_attr, (list, tuple)): + if len(weight_attr) == 1: + encoder_weight_attr = [weight_attr[0]] * 2 + decoder_weight_attr = [weight_attr[0]] * 3 + elif len(weight_attr) == 2: + encoder_weight_attr = weight_attr + decoder_weight_attr = [ + weight_attr[0], weight_attr[0], weight_attr[-1] + ] + elif len(weight_attr) == 3: + encoder_weight_attr = [weight_attr[0], weight_attr[-1]] + decoder_weight_attr = weight_attr + else: + assert False, ( + "length of weight_attr should be 1 or 2 or 3 when it is a list/tuple" + ) + else: + encoder_weight_attr = weight_attr + decoder_weight_attr = weight_attr + if custom_encoder is not None: self.encoder = custom_encoder else: encoder_layer = TransformerEncoderLayer( d_model, nhead, dim_feedforward, dropout, activation, - attn_dropout, act_dropout, normalize_before, weight_attr, - bias_attr) + attn_dropout, act_dropout, normalize_before, + encoder_weight_attr, encoder_bias_attr) encoder_norm = LayerNorm(d_model) self.encoder = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm) @@ -1065,8 +1127,8 @@ class Transformer(Layer): else: decoder_layer = TransformerDecoderLayer( d_model, nhead, dim_feedforward, dropout, activation, - attn_dropout, act_dropout, normalize_before, weight_attr, - bias_attr) + attn_dropout, act_dropout, normalize_before, + decoder_weight_attr, decoder_bias_attr) decoder_norm = LayerNorm(d_model) self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm) -- GitLab From a5ef246cac5012506861c8d54230c5024f891fc8 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Fri, 18 Sep 2020 16:33:24 +0800 Subject: [PATCH 008/117] Optimize emb_eltwise_layernorm_plugin and support fp16 (#27128) --- cmake/cuda.cmake | 3 + .../tensorrt/convert/emb_eltwise_layernorm.cc | 6 +- .../plugin/emb_eltwise_layernorm_plugin.cu | 214 +++++++++++------- .../plugin/emb_eltwise_layernorm_plugin.h | 178 ++++++++++++--- ...rt_dynamic_shape_ernie_deserialize_test.cc | 10 +- 5 files changed, 287 insertions(+), 124 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 7a94bda0f5f..c78fe5f6c7f 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -107,6 +107,9 @@ function(select_nvcc_arch_flags out_variable) elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") set(cuda_arch_bin "50") elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") + if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) + add_definitions("-DSUPPORTS_CUDA_FP16") + endif() set(cuda_arch_bin "60 61") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc index cdc0e415d46..9fff558c583 100644 --- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc @@ -80,10 +80,10 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { - plugin::DynamicPluginTensorRT* plugin = nullptr; - plugin = new plugin::EmbEltwiseLayernormPluginDynamic( + auto use_fp16 = engine_->WithFp16(); + auto plugin = new plugin::EmbEltwiseLayernormPluginDynamic( input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden, - eps); + eps, use_fp16); layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin); } else { PADDLE_THROW(platform::errors::Fatal( diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu index 5e43be90de3..873631fea61 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu @@ -32,13 +32,34 @@ namespace plugin { #if IS_TRT_VERSION_GE(6000) template -int EmbEltwiseLayernormPluginDynamic::initialize() { +EmbEltwiseLayernormPluginDynamicImpl< + T>::~EmbEltwiseLayernormPluginDynamicImpl() { + this->terminate(); +} + +inline half fp32tofp16(float x) { return static_cast(x); } + +template +int EmbEltwiseLayernormPluginDynamicImpl::initialize() { embs_gpu_.resize(embs_.size()); for (int i = 0; i < embs_.size(); i++) { if (embs_[i]) { - cudaMalloc(&embs_gpu_[i], sizeof(float) * emb_sizes_[i]); - cudaMemcpy(embs_gpu_[i], embs_[i], emb_sizes_[i] * sizeof(float), + T *host_ptr; + auto size = emb_sizes_[i]; + + if (std::is_same::value) { + host_ptr = new T[size]; + std::transform(embs_[i], (embs_[i] + size), host_ptr, fp32tofp16); + } else { + host_ptr = reinterpret_cast(embs_[i]); + } + + cudaMalloc(&embs_gpu_[i], sizeof(T) * size); + cudaMemcpy(embs_gpu_[i], host_ptr, size * sizeof(T), cudaMemcpyHostToDevice); + if (std::is_same::value) { + delete[] host_ptr; + } } } @@ -53,11 +74,105 @@ int EmbEltwiseLayernormPluginDynamic::initialize() { cudaMemcpyHostToDevice); } + int input_num = embs_.size(); + in_ptr_tensor_.Resize({input_num}); + emb_ptr_tensor_.Resize({input_num}); + + cudaGetDevice(&device_id_); + auto emb_ptr_gpu_d = + emb_ptr_tensor_.mutable_data(platform::CUDAPlace(device_id_)); + cudaMemcpy(emb_ptr_gpu_d, embs_gpu_.data(), sizeof(uintptr_t) * input_num, + cudaMemcpyHostToDevice); + return 0; } template -nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( +void EmbEltwiseLayernormPluginDynamicImpl::terminate() { + for (int i = 0; i < embs_gpu_.size(); ++i) { + if (embs_gpu_[i]) { + cudaFree(embs_gpu_[i]); + embs_gpu_[i] = nullptr; + } + } + + if (bias_gpu_) { + cudaFree(bias_gpu_); + bias_gpu_ = nullptr; + } + + if (scale_gpu_) { + cudaFree(scale_gpu_); + scale_gpu_ = nullptr; + } +} + +template +int EmbEltwiseLayernormPluginDynamicImpl::enqueue( + const nvinfer1::PluginTensorDesc *input_desc, + const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, + void *const *outputs, void *workspace, cudaStream_t stream) { + auto id_dims = input_desc[0].dims; + int batch = id_dims.d[0]; + int seq_len = id_dims.d[1]; + int input_num = embs_.size(); + + auto in_ptr_gpu_d = + in_ptr_tensor_.mutable_data(platform::CUDAPlace(device_id_)); + auto emb_ptr_gpu_d = + emb_ptr_tensor_.mutable_data(platform::CUDAPlace(device_id_)); + + auto new_input_ptr = reinterpret_cast(inputs[0]); + + if (old_input_ptr_ != new_input_ptr) { + old_input_ptr_ = new_input_ptr; + + cudaMemcpyAsync(in_ptr_gpu_d, reinterpret_cast(inputs), + sizeof(uintptr_t) * input_num, cudaMemcpyHostToDevice, + stream); + } + + auto out_type = output_desc[0].type; + + if (std::is_same::value) { + PADDLE_ENFORCE_EQ( + out_type == nvinfer1::DataType::kFLOAT, true, + platform::errors::InvalidArgument( + "The EmbEltwiseLayernorm Plugin only support fp32 input.")); + } else if (std::is_same::value) { + PADDLE_ENFORCE_EQ( + out_type == nvinfer1::DataType::kHALF, true, + platform::errors::InvalidArgument( + "The EmbEltwiseLayernorm Plugin only support fp16 input.")); + } else { + PADDLE_THROW(platform::errors::Fatal( + "Unsupport data type, the out type of EmbEltwiseLayernorm should be " + "float or half.")); + } + + auto *output_d = reinterpret_cast(outputs[0]); + + operators::math::EmbEltwiseLayerNormFunctor emb_eltwise_layernorm_func; + emb_eltwise_layernorm_func(batch, seq_len, hidden_size_, in_ptr_gpu_d, + scale_gpu_, bias_gpu_, emb_ptr_gpu_d, output_d, + eps_, input_num, stream); + return cudaGetLastError() != cudaSuccess; +} + +template class EmbEltwiseLayernormPluginDynamicImpl; +#ifdef SUPPORTS_CUDA_FP16 +template class EmbEltwiseLayernormPluginDynamicImpl; +#endif // SUPPORTS_CUDA_FP16 + +int EmbEltwiseLayernormPluginDynamic::initialize() { + impl_->initialize(); + + return 0; +} + +void EmbEltwiseLayernormPluginDynamic::terminate() { impl_->terminate(); } + +nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, nvinfer1::IExprBuilder &expr_builder) { // NOLINT PADDLE_ENFORCE_EQ(output_index, 0, @@ -76,18 +191,7 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( return ret; } -template -void EmbEltwiseLayernormPluginDynamic::terminate() { - for (auto ptr : embs_gpu_) { - if (ptr) cudaFree(ptr); - } - - if (bias_gpu_) cudaFree(bias_gpu_); - if (scale_gpu_) cudaFree(scale_gpu_); -} - -template -bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( +bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( int pos, const nvinfer1::PluginTensorDesc *in_out, int nb_inputs, int nb_outputs) { PADDLE_ENFORCE_NOT_NULL( @@ -98,6 +202,11 @@ bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( "The EmbEltwiseLayerNorm's output should be one" "but it's (%d) outputs.", nb_outputs)); + PADDLE_ENFORCE_EQ(nb_outputs, 1, + platform::errors::InvalidArgument( + "The EmbEltwiseLayerNorm's output should be one" + "but it's (%d) outputs.", + nb_outputs)); PADDLE_ENFORCE_LT( pos, nb_inputs + nb_outputs, platform::errors::InvalidArgument("The pos(%d) should be less than the " @@ -122,7 +231,7 @@ bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( } if (pos == all_nums - 1) { - if (sizeof(T) == sizeof(float)) { + if (with_fp16_ == false) { return desc.type == nvinfer1::DataType::kFLOAT; } else { return desc.type == nvinfer1::DataType::kHALF; @@ -131,84 +240,27 @@ bool EmbEltwiseLayernormPluginDynamic::supportsFormatCombination( return false; } -template -nvinfer1::DataType EmbEltwiseLayernormPluginDynamic::getOutputDataType( +nvinfer1::DataType EmbEltwiseLayernormPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const { PADDLE_ENFORCE_EQ( index, 0, platform::errors::InvalidArgument( "The EmbEltwiseLayernorm Plugin only has one input, so the " "index value should be 0, but get %d.", index)); - return nvinfer1::DataType::kFLOAT; + if (with_fp16_) + return nvinfer1::DataType::kHALF; + else + return nvinfer1::DataType::kFLOAT; } -template -int EmbEltwiseLayernormPluginDynamic::enqueue( +int EmbEltwiseLayernormPluginDynamic::enqueue( const nvinfer1::PluginTensorDesc *input_desc, const nvinfer1::PluginTensorDesc *output_desc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) { - auto id_dims = input_desc[0].dims; - int batch = id_dims.d[0]; - int seq_len = id_dims.d[1]; - int input_num = embs_.size(); - - framework::Tensor in_ptr_tensor, emb_ptr_tensor; - int device_id; - cudaGetDevice(&device_id); - - in_ptr_tensor.Resize({input_num}); - emb_ptr_tensor.Resize({input_num}); - int64_t *in_ptr_gpu_d = - in_ptr_tensor.mutable_data(platform::CUDAPlace(device_id)); - int64_t *emb_ptr_gpu_d = - emb_ptr_tensor.mutable_data(platform::CUDAPlace(device_id)); - - std::vector in_ptr, emb_ptr; - for (int i = 0; i < input_num; i++) { - in_ptr.push_back(reinterpret_cast(inputs[i])); - emb_ptr.push_back(reinterpret_cast(embs_gpu_[i])); - } - - cudaMemcpyAsync(in_ptr_gpu_d, in_ptr.data(), sizeof(int64_t) * input_num, - cudaMemcpyHostToDevice, stream); - cudaMemcpyAsync(emb_ptr_gpu_d, emb_ptr.data(), sizeof(int64_t) * input_num, - cudaMemcpyHostToDevice, stream); - - auto out_type = output_desc[0].type; - - const unsigned tpb = 256; - const dim3 grid(seq_len, batch, 1); - const dim3 block(tpb, 1, 1); - if (sizeof(T) == sizeof(float)) { - PADDLE_ENFORCE_EQ( - out_type == nvinfer1::DataType::kFLOAT, true, - platform::errors::InvalidArgument( - "The EmbEltwiseLayernorm Plugin only support fp32 input.")); - } else if (sizeof(T) == sizeof(int16_t)) { - PADDLE_ENFORCE_EQ( - out_type == nvinfer1::DataType::kHALF, true, - platform::errors::InvalidArgument( - "The EmbEltwiseLayernorm Plugin only support fp16 input.")); - } else { - PADDLE_THROW(platform::errors::Fatal( - "Unsupport data type, the out type of EmbEltwiseLayernorm should be " - "float or half.")); - } - - T *output_d = static_cast(outputs[0]); - - operators::math::EmbEltwiseLayerNormFunctor emb_eltwise_layernorm_func; - emb_eltwise_layernorm_func(batch, seq_len, hidden_size_, in_ptr_gpu_d, - scale_gpu_, bias_gpu_, emb_ptr_gpu_d, output_d, - eps_, input_num, stream); + impl_->enqueue(input_desc, output_desc, inputs, outputs, workspace, stream); return cudaGetLastError() != cudaSuccess; } -template class EmbEltwiseLayernormPluginDynamic; -#ifdef SUPPORTS_CUDA_FP16 -template class EmbEltwiseLayernormPluginDynamic; -#endif // SUPPORTS_CUDA_FP16 - #endif } // namespace plugin diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h index 5babd87db06..24ca853104e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h @@ -27,14 +27,76 @@ namespace tensorrt { namespace plugin { #if IS_TRT_VERSION_GE(6000) + +class EmbEltwiseLayernormPluginDynamicImplBase { + public: + EmbEltwiseLayernormPluginDynamicImplBase() {} + virtual ~EmbEltwiseLayernormPluginDynamicImplBase() {} + + virtual int initialize() = 0; + virtual void terminate() = 0; + virtual int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, + void* workspace, cudaStream_t stream) = 0; +}; + template +class EmbEltwiseLayernormPluginDynamicImpl + : public EmbEltwiseLayernormPluginDynamicImplBase { + public: + explicit EmbEltwiseLayernormPluginDynamicImpl(std::vector input_embs, + float* bias, float* scale, + std::vector emb_sizes, + int bias_size, int scale_size, + int hidden_size, float eps) + : embs_(input_embs), + bias_(bias), + scale_(scale), + emb_sizes_(emb_sizes), + bias_size_(bias_size), + scale_size_(scale_size), + hidden_size_(hidden_size), + eps_(eps) {} + + ~EmbEltwiseLayernormPluginDynamicImpl(); + + int initialize(); + void terminate(); + int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, void* workspace, + cudaStream_t stream); + + private: + std::vector embs_; + float* bias_{nullptr}; + float* scale_{nullptr}; + + // data on devices + float* bias_gpu_{nullptr}; + float* scale_gpu_{nullptr}; + std::vector embs_gpu_; + + std::vector emb_sizes_; + int bias_size_; + int scale_size_; + int hidden_size_; + float eps_; + + framework::Tensor in_ptr_tensor_, emb_ptr_tensor_; + int device_id_{0}; + uintptr_t old_input_ptr_{0}; +}; + class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { public: explicit EmbEltwiseLayernormPluginDynamic(std::vector input_embs, float* bias, float* scale, std::vector emb_sizes, int bias_size, int scale_size, - int hidden_size, float eps) + int hidden_size, float eps, + bool with_fp16) : embs_(input_embs), bias_(bias), scale_(scale), @@ -42,51 +104,81 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { bias_size_(bias_size), scale_size_(scale_size), hidden_size_(hidden_size), - eps_(eps) {} + eps_(eps), + with_fp16_(with_fp16), + own_host_buff_(false) { + if (with_fp16) { +#ifdef SUPPORTS_CUDA_FP16 + impl_ = new EmbEltwiseLayernormPluginDynamicImpl( + embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, + hidden_size_, eps_); +#else + PADDLE_THROW(platform::errors::Fatal( + "Unsupported data type, current GPU doesn't support half.")); +#endif // SUPPORTS_CUDA_FP16 + } else { + impl_ = new EmbEltwiseLayernormPluginDynamicImpl( + embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, + hidden_size_, eps_); + } + } EmbEltwiseLayernormPluginDynamic(void const* serial_data, - size_t serial_length) { + size_t serial_length) + : own_host_buff_(true) { DeserializeValue(&serial_data, &serial_length, &emb_sizes_); - embs_gpu_.resize(emb_sizes_.size()); embs_.resize(emb_sizes_.size()); for (size_t i = 0; i < emb_sizes_.size(); i++) { - cudaMalloc(&embs_gpu_[i], sizeof(float) * emb_sizes_[i]); - cudaMemcpy(embs_gpu_[i], serial_data, emb_sizes_[i] * sizeof(float), - cudaMemcpyHostToDevice); + auto size = emb_sizes_[i]; + auto ptr = new float[size]; + memcpy(ptr, serial_data, sizeof(float) * size); + embs_[i] = ptr; reinterpret_cast(serial_data) += emb_sizes_[i] * sizeof(float); serial_length -= emb_sizes_[i] * sizeof(float); - embs_[i] = nullptr; } DeserializeValue(&serial_data, &serial_length, &bias_size_); DeserializeValue(&serial_data, &serial_length, &scale_size_); - cudaMalloc(&bias_gpu_, sizeof(float) * bias_size_); - cudaMemcpy(bias_gpu_, serial_data, bias_size_ * sizeof(float), - cudaMemcpyHostToDevice); - bias_ = nullptr; + if (bias_size_) { + bias_ = new float[bias_size_]; + memcpy(bias_, serial_data, sizeof(float) * bias_size_); + } reinterpret_cast(serial_data) += bias_size_ * sizeof(float); serial_length -= bias_size_ * sizeof(float); - cudaMalloc(&scale_gpu_, sizeof(float) * scale_size_); - cudaMemcpy(scale_gpu_, serial_data, scale_size_ * sizeof(float), - cudaMemcpyHostToDevice); - scale_ = nullptr; + if (scale_size_) { + scale_ = new float[scale_size_]; + memcpy(scale_, serial_data, sizeof(float) * scale_size_); + } reinterpret_cast(serial_data) += scale_size_ * sizeof(float); serial_length -= scale_size_ * sizeof(float); DeserializeValue(&serial_data, &serial_length, &hidden_size_); DeserializeValue(&serial_data, &serial_length, &eps_); + DeserializeValue(&serial_data, &serial_length, &with_fp16_); + + if (with_fp16_) { +#ifdef SUPPORTS_CUDA_FP16 + impl_ = new EmbEltwiseLayernormPluginDynamicImpl( + embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, + hidden_size_, eps_); +#else + PADDLE_THROW(platform::errors::Fatal( + "Unsupported data type, current GPU doesn't support half.")); +#endif // SUPPORTS_CUDA_FP16 + } else { + impl_ = new EmbEltwiseLayernormPluginDynamicImpl( + embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, + hidden_size_, eps_); + } } nvinfer1::IPluginV2DynamicExt* clone() const override { auto ptr = new EmbEltwiseLayernormPluginDynamic( embs_, bias_, scale_, emb_sizes_, bias_size_, scale_size_, hidden_size_, - eps_); - ptr->embs_gpu_ = embs_gpu_; - ptr->bias_gpu_ = bias_gpu_; - ptr->scale_gpu_ = scale_gpu_; + eps_, with_fp16_); return ptr; } @@ -95,6 +187,7 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { } int getNbOutputs() const override { return 1; } int initialize() override; + void terminate() override; size_t getSerializationSize() const override { int sum_num = 0; @@ -110,24 +203,32 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { sum_num += (bias_size_ + scale_size_) * sizeof(float); sum_num += SerializedSize(hidden_size_); sum_num += SerializedSize(eps_); - // sum_num += SerializedSize(with_fp16_); + sum_num += SerializedSize(with_fp16_); return sum_num; } - void terminate() override; void serialize(void* buffer) const override { - // SerializeValue(&buffer, with_fp16_); SerializeValue(&buffer, emb_sizes_); for (size_t i = 0; i < emb_sizes_.size(); i++) { - SerializeCudaPointer(&buffer, embs_gpu_[i], emb_sizes_[i]); + auto size = emb_sizes_[i]; + for (int j = 0; j < size; ++j) { + SerializeValue(&buffer, embs_[i][j]); + } } SerializeValue(&buffer, bias_size_); SerializeValue(&buffer, scale_size_); - SerializeCudaPointer(&buffer, bias_gpu_, bias_size_); - SerializeCudaPointer(&buffer, scale_gpu_, scale_size_); + for (int i = 0; i < bias_size_; ++i) { + SerializeValue(&buffer, bias_[i]); + } + + for (int i = 0; i < scale_size_; ++i) { + SerializeValue(&buffer, scale_[i]); + } + SerializeValue(&buffer, hidden_size_); SerializeValue(&buffer, eps_); + SerializeValue(&buffer, with_fp16_); } nvinfer1::DimsExprs getOutputDimensions( @@ -158,23 +259,33 @@ class EmbEltwiseLayernormPluginDynamic : public DynamicPluginTensorRT { const nvinfer1::DataType* input_types, int nb_inputs) const override; - void destroy() override { delete this; } + void destroy() override { + if (own_host_buff_) { + for (auto ptr : embs_) { + delete[] ptr; + } + delete[] bias_; + delete[] scale_; + } + + delete impl_; + delete this; + } private: std::vector embs_; float* bias_; float* scale_; - // data on devices - float* bias_gpu_; - float* scale_gpu_; - std::vector embs_gpu_; - std::vector emb_sizes_; int bias_size_; int scale_size_; int hidden_size_; float eps_; + + bool with_fp16_; + bool own_host_buff_{false}; + EmbEltwiseLayernormPluginDynamicImplBase* impl_{nullptr}; }; class EmbEltwiseLayernormPluginV2Creator : public nvinfer1::IPluginCreator { @@ -198,8 +309,7 @@ class EmbEltwiseLayernormPluginV2Creator : public nvinfer1::IPluginCreator { nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serial_data, size_t serial_length) override { - return new EmbEltwiseLayernormPluginDynamic(serial_data, - serial_length); + return new EmbEltwiseLayernormPluginDynamic(serial_data, serial_length); } void setPluginNamespace(const char* lib_namespace) override { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc index 685f7b6600e..d49f83b9d38 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc @@ -151,7 +151,7 @@ void trt_ernie(bool with_fp16, std::vector result) { run(config, &out_data); // serialize run(*config_deser, &out_data); // deserialize for (size_t i = 0; i < out_data.size(); i++) { - EXPECT_NEAR(result[i], out_data[i], 1e-6); + EXPECT_NEAR(result[i], out_data[i], 1e-2); } } @@ -159,13 +159,11 @@ TEST(AnalysisPredictor, no_fp16) { std::vector result = {0.597841, 0.219972, 0.182187}; trt_ernie(false, result); } - -TEST(AnalysisPredictor, fp16) { #ifdef SUPPORTS_CUDA_FP16 - std::vector result = {0.598336, 0.219558, 0.182106}; +TEST(AnalysisPredictor, fp16) { + std::vector result = {0.59923654, 0.21923761, 0.18152587}; trt_ernie(true, result); -#endif } - +#endif // SUPPORTS_CUDA_FP16 } // namespace inference } // namespace paddle -- GitLab From 99626502f747cc85d518d87267cec821ffbf69a3 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Fri, 18 Sep 2020 22:32:28 +0800 Subject: [PATCH 009/117] =?UTF-8?q?=E3=80=90paddle.fleet=E3=80=91gloo=20an?= =?UTF-8?q?d=20util=20(#27213)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix worker endpoints * fix gloo wrapper for hdfs * GPU fleetrun support gloo * parameterserver fleetrun support gloo * fix get server endpoint --- paddle/fluid/framework/fleet/gloo_wrapper.cc | 25 +- python/paddle/distributed/fleet/__init__.py | 1 + .../distributed/fleet/base/fleet_base.py | 22 +- .../distributed/fleet/base/role_maker.py | 592 ++++++++++++------ .../distributed/fleet/base/util_factory.py | 39 +- python/paddle/distributed/fleet/launch.py | 26 +- .../paddle/distributed/fleet/launch_utils.py | 10 +- .../fluid/tests/unittests/test_fleet_base.py | 19 +- .../unittests/test_fleet_rolemaker_new.py | 283 ++++++++- .../fluid/tests/unittests/test_fleet_util.py | 97 +-- 10 files changed, 749 insertions(+), 365 deletions(-) diff --git a/paddle/fluid/framework/fleet/gloo_wrapper.cc b/paddle/fluid/framework/fleet/gloo_wrapper.cc index bb958f1ac01..f195dde4084 100644 --- a/paddle/fluid/framework/fleet/gloo_wrapper.cc +++ b/paddle/fluid/framework/fleet/gloo_wrapper.cc @@ -19,6 +19,8 @@ limitations under the License. */ namespace gloo { namespace rendezvous { +constexpr int kNodeSize = 136; + HdfsStore::HdfsStore(const std::string& path) { path_ = path; wait_sleep_ms_ = 10000; @@ -213,12 +215,14 @@ void ParallelConnectContext::connectFullMesh( storeKey << rank; store.set(storeKey.str(), allBytes); + auto total_add_size = kNodeSize * (size - 1); + std::vector> connect_threads(thread_num_); // Connect every pair for (uint32_t i = 0; i < connect_threads.size(); ++i) { connect_threads[i].reset(new std::thread( - [&store, &transportContext, this](size_t thread_idx, - size_t thread_num) -> void { + [&store, &transportContext, total_add_size, this]( + size_t thread_idx, size_t thread_num) -> void { for (int i = thread_idx; i < size; i += thread_num) { if (i == rank) { continue; @@ -226,8 +230,23 @@ void ParallelConnectContext::connectFullMesh( // Wait for address of other side of this pair to become available std::string key = std::to_string(i); store.wait({key}, getTimeout()); + + std::vector allAddrs; + auto max_retry_times = 5; // Connect to other side of this pair - auto allAddrs = store.get(key); + + while (max_retry_times > 0) { + allAddrs = store.get(key); + + VLOG(3) << "store get all address size: " << allAddrs.size() + << " except: " << total_add_size; + if (allAddrs.size() == static_cast(total_add_size)) { + break; + } + + --max_retry_times; + } + auto addr = extractAddress(allAddrs, i); transportContext->getPair(i)->connect(addr); } diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 2539fa57a34..f3ee09a6d9e 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -39,6 +39,7 @@ server_num = fleet.server_num server_index = fleet.server_index server_endpoints = fleet.server_endpoints is_server = fleet.is_server +set_util = fleet.set_util util = fleet.util barrier_worker = fleet.barrier_worker init_worker = fleet.init_worker diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 805c2d1fc73..aeb8cac98e2 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -180,6 +180,8 @@ class Fleet(object): raise ValueError( "`role_maker` should be subclass of `RoleMakerBase`, but got {}". format(type(role_maker))) + self._role_maker.generate_role() + self.strategy_compiler = StrategyCompiler() if paddle.fluid.framework.in_dygraph_mode(): if parallel_helper._is_parallel_ctx_initialized(): @@ -187,7 +189,6 @@ class Fleet(object): "The dygraph parallel environment has been initialized.") else: paddle.distributed.init_parallel_env() - return None def is_first_worker(self): """ @@ -275,13 +276,10 @@ class Fleet(object): fleet.worker_endpoints() """ - ''' if to_string: return ",".join(self._role_maker.get_trainer_endpoints()) else: return self._role_maker.get_trainer_endpoints() - ''' - return ["127.0.0.1:1001", "127.0.0.1:1002"] def server_num(self): """ @@ -355,7 +353,9 @@ class Fleet(object): return self._role_maker.is_server( ) or self._role_maker._is_heter_worker() - @property + def set_util(self, util): + self._util = util + def util(self): """ Utility functions that can be used under certain runtime @@ -376,16 +376,6 @@ class Fleet(object): """ return self._util - @util.setter - def util(self, util): - """ - Set Utility functions for userd-defined runtime - - Returns: - None - """ - self._util = util - def barrier_worker(self): """ barrier all workers @@ -393,7 +383,7 @@ class Fleet(object): Returns: None """ - self._role_maker.barrier_worker() + self._role_maker._barrier("worker") @is_non_distributed_check @inited_runtime_handler diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index a3a809ee375..d36c06047f5 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -13,18 +13,332 @@ # limitations under the License. """Defination of Role Makers.""" import os +import time import numpy as np import warnings from multiprocessing import Process, Manager -import paddle.fluid as fluid -#__all__ = ['UserDefinedRoleMaker', 'PaddleCloudRoleMaker'] +import paddle.fluid as fluid class Role: WORKER = 1 SERVER = 2 HETER_WORKER = 3 + ALL = 4 + + +class Gloo(object): + """ + Gloo is a universal class for barrier and collective communication + """ + + class RENDEZVOUS: + HDFS = 1 + FILE = 2 + HTTP = 3 + + def __init__(self): + self._worker_comm = None + self._server_comm = None + self._nodes_comm = None + + self._comm_world = ["worker", "server", "all"] + self._err_init = "gloo is not initialized, will not communicator with other nodes" + self._err_type = "gloo initialized error, please check arguments" + self._err_world = "argument error, comm_world must in {}".format( + self._comm_world) + + self._is_initialized = False + self._init_timeout_seconds = 3600 + self._run_timeout_seconds = 9999999 + + self._rendezvous = None + self._role = None + self._iface = None + + self._role_id = -1 + self._worker_num = -1 + self._server_num = -1 + self._need_init_all = False + + def init(self, + rendezvous, + role, + role_id, + worker_num, + server_num, + need_init_all=False, + kwargs=None): + + self._rendezvous = rendezvous + self._role = role + self._role_id = role_id + self._worker_num = worker_num + self._server_num = server_num + self._need_init_all = need_init_all + self._iface = self.__get_default_iface() + self._prefix = kwargs.get("store.prefix", "") + + if self._rendezvous == Gloo.RENDEZVOUS.HDFS: + dfs_name = kwargs.get("dfs.name", "") + dfs_ugi = kwargs.get("dfs.ugi", "") + dfs_path = kwargs.get("dfs.path", "") + + if not dfs_name or not dfs_ugi or not dfs_path: + raise ValueError(self._err_type) + self._init_dfs(dfs_name, dfs_ugi, dfs_path, self._prefix) + + elif self._rendezvous == Gloo.RENDEZVOUS.FILE: + fs_path = kwargs.get("dfs.path", "") + + if not fs_path: + raise ValueError(self._err_type) + self._init_fs(fs_path, self._prefix) + + elif self._rendezvous == Gloo.RENDEZVOUS.HTTP: + ip = kwargs.get("http.host", "") + port = kwargs.get("http.port", "") + + if not ip or not port: + raise ValueError(self._err_type) + self._init_http(ip, port, self._prefix) + + else: + raise ValueError(self._err_type) + + self._is_initialized = True + + def _init_fs(self, fs_path, prefix): + def init(rank, nodes, role): + gloo = fluid.core.Gloo() + gloo.set_rank(rank) + gloo.set_size(nodes) + gloo.set_prefix(prefix) + gloo.set_iface(self._iface) + gloo.set_timeout_seconds(self._init_timeout_seconds, + self._run_timeout_seconds) + gloo.set_hdfs_store(os.path.join(fs_path, role), "", "") + gloo.init() + return gloo + + if self._role == Role.WORKER: + rank, nodes = self._get_rank_nodes(Role.WORKER) + gloo = init(rank, nodes, "WORKER") + self._worker_comm = gloo + else: + rank, nodes = self._get_rank_nodes(Role.SERVER) + gloo = init(rank, nodes, "SERVER") + self._server_comm = gloo + + if self._need_init_all: + rank, nodes = self._get_rank_nodes(Role.ALL) + gloo = init(rank, nodes, "ALL") + self._nodes_comm = gloo + + def _init_dfs(self, dfs_name, dfs_ugi, dfs_path, prefix): + def init(rank, nodes, role): + gloo = fluid.core.Gloo() + gloo.set_rank(rank) + gloo.set_size(nodes) + gloo.set_prefix(prefix) + gloo.set_iface(self._iface) + gloo.set_timeout_seconds(self._init_timeout_seconds, + self._run_timeout_seconds) + gloo.set_hdfs_store(os.path.join(dfs_path, role), dfs_name, dfs_ugi) + gloo.init() + return gloo + + if self._role == Role.WORKER: + rank, nodes = self._get_rank_nodes(Role.WORKER) + gloo = init(rank, nodes, "WORKER") + self._worker_comm = gloo + else: + rank, nodes = self._get_rank_nodes(Role.SERVER) + gloo = init(rank, nodes, "SERVER") + self._server_comm = gloo + + if self._need_init_all: + rank, nodes = self._get_rank_nodes(Role.ALL) + gloo = init(rank, nodes, "ALL") + self._nodes_comm = gloo + + def _init_http(self, ip, port, prefix): + def __start_kv_server(http_server_d, size_d): + from paddle.distributed.fleet.utils.http_server import KVServer + http_server = KVServer(port, size_d) + http_server.start() + wait_seconds = 5 + while http_server_d.get("running", + False) and not http_server.shoud_stop(): + time.sleep(wait_seconds) + http_server.stop() + + def init_kv_server(): + size_d = { + "trainer": self._worker_num, + "pserver": self._server_num, + "all": self._worker_num + self._server_num + } + + _http_server_d = {"running": True} + # child process for http server + _http_server = Process( + target=__start_kv_server, args=(_http_server_d, size_d)) + _http_server.daemon = True + # set running status to True + # start child process + _http_server.start() + + def init(rank, nodes, role): + gloo = fluid.core.Gloo() + gloo.set_rank(rank) + gloo.set_size(nodes) + gloo.set_prefix(prefix) + gloo.set_iface(self._iface) + gloo.set_timeout_seconds(self._init_timeout_seconds, + self._run_timeout_seconds) + gloo.set_http_store(ip, port, role) + return gloo + + port = int(port) + + if self._role == Role.SERVER and self._role_id == 0: + init_kv_server() + + if self._role == Role.WORKER: + rank, nodes = self._get_rank_nodes(Role.WORKER) + gloo = init(rank, nodes, "WORKER") + self._worker_comm = gloo + else: + rank, nodes = self._get_rank_nodes(Role.SERVER) + gloo = init(rank, nodes, "SERVER") + self._server_comm = gloo + + if self._need_init_all: + rank, nodes = self._get_rank_nodes(Role.ALL) + gloo = init(rank, nodes, "ALL") + self._nodes_comm = gloo + + def _get_rank_nodes(self, role): + nodes = 0 + rank = -1 + + if role == Role.WORKER: + nodes = self._worker_num + rank = self._role_id + elif role == Role.SERVER: + nodes = self._server_num + rank = self._role_id + elif role == Role.ALL: + nodes = self._worker_num + self._server_num + + if self._role == Role.WORKER: + rank = self._role_id + else: + rank = self._worker_num + self._role_id + else: + ValueError(self._err_type) + + return rank, nodes + + def __get_default_iface(self): + """ + get default physical interface + """ + default1 = self.__get_default_iface_from_gateway() + default2 = self.__get_default_iface_from_interfaces() + return default2 if default1 == "lo" else default1 + + def __get_default_iface_from_gateway(self): + """ + get default physical interface + """ + import netifaces + gateways = netifaces.gateways() + if gateways.get(netifaces.AF_INET) != None: + gateway = gateways[netifaces.AF_INET] + if len(gateway) > 0 and len(gateway[0]) > 1: + return gateway[0][1] + return "lo" + + def __get_default_iface_from_interfaces(self): + """ + get default physical interface + """ + import netifaces + for intf_name in netifaces.interfaces(): + addresses = netifaces.ifaddresses(intf_name) + if netifaces.AF_INET in addresses: + ipv4_addresses = addresses[netifaces.AF_INET] + for ipv4_address in ipv4_addresses: + if 'broadcast' in ipv4_address: + return intf_name + return "lo" + + def barrier(self, comm_world): + """ + dummy barrier, do nothing + """ + if not self._is_initialized: + warnings.warn(self._err_init) + return + + if comm_world not in self._comm_world: + raise ValueError(self._err_world) + + if comm_world == "worker": + self._worker_comm.barrier() + elif comm_world == "server": + self._server_comm.barrier() + else: + self._nodes_comm.barrier() + + def all_reduce(self, input, mode="sum", comm_world="worker"): + if not self._is_initialized: + warnings.warn(self._err_init) + return input + + if comm_world not in self._comm_world: + raise ValueError(self._err_world) + + input = np.array(input) + input_shape = input.shape + input_list = input.reshape(-1).tolist() + + self.barrier(comm_world) + + if comm_world == "worker": + ans = self._worker_comm.all_reduce(input_list, mode) + elif comm_world == "server": + ans = self._server_comm.all_reduce(input_list, mode) + else: + ans = self._nodes_comm.all_reduce(input_list, mode) + + output = np.array(ans).reshape(input_shape) + return output + + def all_gather(self, input, comm_world="worker"): + """ + dummy all gather, do nothing + Args: + obj(any): obj to do all gather + """ + if not self._is_initialized: + warnings.warn(self._err_init) + return input + + if comm_world not in self._comm_world: + raise ValueError(self._err_world) + + if comm_world == "worker": + output = self._worker_comm.all_gather(input) + elif comm_world == "server": + output = self._server_comm.all_gather(input) + else: + output = self._nodes_comm.all_gather(input) + + return output class RoleMakerBase(object): @@ -47,10 +361,6 @@ class RoleMakerBase(object): self._heter_trainer_device = "CPU" self._is_heter_parameter_server_mode = False - self._node_type = None - self._node_type_comm = None - self._all_comm = None - def is_worker(self): """ return is_worker() of current process @@ -142,19 +452,11 @@ class RoleMakerBase(object): self._role, self._current_id, self._worker_endpoints, self._server_endpoints) - def _all_gather(self, comm_world, input): - """ - - Args: - input(int|float): input value - - Returns: - return a list of values - """ - print("warning: RoleMakerBase does not have all gather.") + def _all_gather(self, input, comm_world="worker"): + print("warning: RoleMakerBase does not have all gather worker.") return None - def _all_reduce(self, comm_world, input, mode="sum"): + def _all_reduce(self, input, mode="sum", comm_world="worker"): """ Args: input(list/numpy.array): array of one dim @@ -221,73 +523,25 @@ class PaddleCloudRoleMaker(RoleMakerBase): def __init__(self, is_collective=False, **kwargs): super(PaddleCloudRoleMaker, self).__init__() self._is_collective = is_collective - self._init_gloo = False # default no init gloo - self._kwargs = kwargs + self._non_distributed = False + + self._kwargs = kwargs self._role_is_generated = False self._server_endpoints = None self._worker_endpoints = None - self._node_type_comm = None - self._all_comm = None - - self._non_distributed = False - - if not self._is_collective: - self._hdfs_name = kwargs.get("hdfs_name", "") - self._hdfs_ugi = kwargs.get("hdfs_ugi", "") - self._hdfs_path = kwargs.get("path", "").rstrip("/") - self._init_timeout_seconds = kwargs.get("init_timeout_seconds", - 3600) - self._run_timeout_seconds = kwargs.get("run_timeout_seconds", - 9999999) - ip_port = kwargs.get("http_ip_port", "") - self._http_ip_port = [] - self._http_server = None - # if ip_port is not empty, it will use http instead of hdfs - if ip_port != "": - self._http_ip_port = ip_port.split(":") - # it's for communication between processes - self._manager = Manager() - # global dict to store status - self._http_server_d = self._manager.dict() - # set running status of http server - self._http_server_d["running"] = False - self._iface = self.__get_default_iface() - # this environment variable can be empty - self._prefix = os.getenv("SYS_JOB_ID", "") + self._gloo = Gloo() # gloo instance def _barrier(self, comm_world): - if isinstance(comm_world, fluid.core.Gloo): - comm_world.barrier() - else: - print("warning: must init Gloo before using _barrier() function") - - def _all_gather(self, comm_world, input): - if isinstance(comm_world, fluid.core.Gloo): - self._barrier(comm_world) - output = comm_world.all_gather(input) - return output - else: - print("warning: must init Gloo before using _all_gather() function") - return None - - def _all_reduce(self, comm_world, input, mode="sum"): - if isinstance(comm_world, fluid.core.Gloo): - - input = np.array(input) + self._gloo.barrier(comm_world) - input_shape = input.shape - input_list = input.reshape(-1).tolist() + def _all_gather(self, input, comm_world="worker"): + return self._gloo.all_gather(input, comm_world) - self._barrier(comm_world) - ans = comm_world.all_reduce(input_list, mode) - output = np.array(ans).reshape(input_shape) - return output - else: - print("warning: must init Gloo before using _all_reduce() function") - return None + def _all_reduce(self, input, mode="sum", comm_world="worker"): + return self._gloo.all_reduce(input, mode, comm_world) def is_worker(self): """ @@ -349,7 +603,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): """ if not self._role_is_generated: self.generate_role() - return self._trainers_num + return len(self.get_pserver_endpoints()) def node_num(self): """ @@ -421,8 +675,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): # Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set # format: string(ip:port,ip:port), eg. 127.0.0.1:6001,127.0.0.1:6002 self._server_endpoints = os.getenv("PADDLE_PSERVERS_IP_PORT_LIST") - self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS", - "").split(",") + if self._server_endpoints is None: # back to non_distributed execution. self._server_endpoints = "" @@ -436,6 +689,13 @@ class PaddleCloudRoleMaker(RoleMakerBase): return self._server_endpoints = self._server_endpoints.split(",") + + self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS") + if self._worker_endpoints: + self._worker_endpoints = self._worker_endpoints.split(",") + else: + self._worker_endpoints = [] + trainers_num = int(os.environ["PADDLE_TRAINERS_NUM"]) training_role = os.environ["TRAINING_ROLE"] @@ -506,6 +766,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._current_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) self._training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") assert (self._training_role == "TRAINER") + self._role = Role.WORKER self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS") self._cur_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") if self._worker_endpoints is None: @@ -518,74 +779,64 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._node_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) - def _init_gloo_env(self): - def init_gloo_instance(role="trainer"): - role = role.lower() - assert role in ["trainer", "pserver", "all"] - if role == "trainer": - all_list = self._worker_endpoints - rank = self._current_id - elif role == "pserver": - all_list = self._server_endpoints - rank = self._current_id - else: - all_list = self._worker_endpoints + self._server_endpoints - rank = all_list.index(self._cur_endpoint) - gloo = fluid.core.Gloo() - gloo.set_rank(rank) - gloo.set_size(len(all_list)) - gloo.set_prefix(self._prefix) - gloo.set_iface(self._iface) - gloo.set_timeout_seconds(self._init_timeout_seconds, - self._run_timeout_seconds) - if len(self._http_ip_port) != 0: - gloo.set_http_store(self._http_ip_port[0], - int(self._http_ip_port[1]), role) - else: - gloo.set_hdfs_store(self._hdfs_path + "/" + role, - self._hdfs_name, self._hdfs_ugi) - gloo.init() - return gloo - - # paddlecloud support gloo - if self._role == Role.WORKER: - if self._current_id == 0 and len(self._http_ip_port) != 0: - size_d = { - "trainer": len(self._worker_endpoints), - "pserver": len(self._server_endpoints), - "all": - len(self._worker_endpoints) + len(self._server_endpoints) - } - # child process for http server - self._http_server = Process( - target=self.__start_kv_server, - args=(self._http_server_d, size_d)) - self._http_server.daemon = True - # set running status to True - self._http_server_d["running"] = True - # start child process - self._http_server.start() - self._node_type = 1 - gloo = init_gloo_instance("trainer") - self._node_type_comm = gloo + def _gloo_init(self): + # PADDLE_WITH_GLOO 1: trainer barrier, 2: all barrier + use_gloo = int(os.getenv("PADDLE_WITH_GLOO", "0")) + if use_gloo not in [1, 2]: + return + + # PADDLE_GLOO_RENDEZVOUS 1: HDFS 2: FILE 3: HTTP + rendezvous_type = int(os.getenv("PADDLE_GLOO_RENDEZVOUS", "0")) + prefix = os.getenv("SYS_JOB_ID", "") + if rendezvous_type not in [ + Gloo.RENDEZVOUS.HDFS, Gloo.RENDEZVOUS.HTTP, Gloo.RENDEZVOUS.FILE + ]: + raise ValueError(self._gloo._err_type) + + need_init_all = True if use_gloo == 2 else False + + if rendezvous_type == Gloo.RENDEZVOUS.HDFS: + dfs_name = os.getenv("PADDLE_GLOO_FS_NAME", "") + dfs_ugi = os.getenv("PADDLE_GLOO_FS_UGI", "") + dfs_path = os.getenv("PADDLE_GLOO_FS_PATH", "") + kwargs = { + "dfs.name": dfs_name, + "dfs.ugi": dfs_ugi, + "dfs.path": dfs_path, + "store.prefix": prefix, + } + elif rendezvous_type == Gloo.RENDEZVOUS.HTTP: + ip = os.getenv("PADDLE_GLOO_HTTP_HOST", "") + port = os.getenv("PADDLE_GLOO_HTTP_PORT", "") + kwargs = { + "http.host": ip, + "http.port": port, + "store.prefix": prefix, + } else: - assert self._role == Role.SERVER - self._node_type = 0 - gloo = init_gloo_instance("pserver") - self._node_type_comm = gloo - - all_list = self._worker_endpoints + self._server_endpoints - self._rank = all_list.index(self._cur_endpoint) - self._size = len(all_list) - - gloo = init_gloo_instance("all") - self._all_comm = gloo - - if self._http_server is not None: - # set running status to False - self._http_server_d["running"] = False - # wait until child process exits - self._http_server.join() + dfs_path = os.getenv("PADDLE_GLOO_FS_PATH", "") + kwargs = { + "dfs.path": dfs_path, + "store.prefix": prefix, + } + + if rendezvous_type == Gloo.RENDEZVOUS.HDFS: + type = "HDFS" + elif rendezvous_type == Gloo.RENDEZVOUS.HTTP: + type = "HTTP" + else: + type = "FILE" + print("Gloo init with {}: need_init_all: {}, args: {}".format( + type, need_init_all, kwargs)) + + self._gloo.init( + rendezvous=rendezvous_type, + role=self._role, + role_id=self.role_id(), + worker_num=self.worker_num(), + server_num=self.server_num(), + need_init_all=need_init_all, + kwargs=kwargs) def generate_role(self): """ @@ -594,57 +845,10 @@ class PaddleCloudRoleMaker(RoleMakerBase): if not self._role_is_generated: if not self._is_collective: self._ps_env() - if "PADDLE_WITH_GLOO" in os.environ: - self._init_gloo = bool(os.environ["PADDLE_WITH_GLOO"]) - if self._init_gloo: - self._init_gloo_env() else: self._collective_env() self._role_is_generated = True - - def __get_default_iface(self): - """ - get default physical interface - """ - default1 = self.__get_default_iface_from_gateway() - default2 = self.__get_default_iface_from_interfaces() - return default2 if default1 == "lo" else default1 - - def __get_default_iface_from_gateway(self): - """ - get default physical interface - """ - import netifaces - gateways = netifaces.gateways() - if gateways.get(netifaces.AF_INET) != None: - gateway = gateways[netifaces.AF_INET] - if len(gateway) > 0 and len(gateway[0]) > 1: - return gateway[0][1] - return "lo" - - def __get_default_iface_from_interfaces(self): - """ - get default physical interface - """ - import netifaces - for intf_name in netifaces.interfaces(): - addresses = netifaces.ifaddresses(intf_name) - if netifaces.AF_INET in addresses: - ipv4_addresses = addresses[netifaces.AF_INET] - for ipv4_address in ipv4_addresses: - if 'broadcast' in ipv4_address: - return intf_name - return "lo" - - def __start_kv_server(self, http_server_d, size_d): - from paddle.distributed.fleet.utils.http_server import KVServer - http_server = KVServer(int(self._http_ip_port[1]), size_d) - http_server.start() - wait_seconds = 5 - while http_server_d.get("running", - False) and not http_server.shoud_stop(): - time.sleep(wait_seconds) - http_server.stop() + self._gloo_init() class UserDefinedRoleMaker(PaddleCloudRoleMaker): @@ -677,7 +881,7 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._worker_endpoints = self._kwargs.get("worker_endpoints") self._current_id = self._kwargs.get("current_id") self._trainers_num = len(self._worker_endpoints) - self._training_role = Role.Worker + self._training_role = Role.WORKER self._node_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) @@ -688,8 +892,6 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): if not self._role_is_generated: if not self._is_collective: self._user_defined_ps_env() - if self._init_gloo: - self._init_gloo_env() else: self._user_defined_collective_env() self._role_is_generated = True diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index 4fa247c3196..e822c3c92f4 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -57,34 +57,7 @@ class UtilBase(object): ), "fs_client must be the instance of paddle.distributed.fleet.utils.FS" self.fs_client = fs_client - def __check_comm_world(self, comm_world="worker"): - if not self.role_maker._role_is_generated: - self.role_maker.generate_role() - - _comm_world = None - comm_world_upper = comm_world.upper() - if comm_world_upper == "WORKER": - if not self.role_maker.is_worker(): - print( - "warning: current role is not worker in collective_func(comm_world=\"worker\")" - ) - _comm_world = self.role_maker._node_type_comm - elif comm_world_upper == "SERVER": - if not self.role_maker.is_server(): - print( - "warning: current role is not server in collective_func(comm_world=\"server\")" - ) - _comm_world = self.role_maker._node_type_comm - elif comm_world_upper == "ALL": - _comm_world = self.role_maker._all_comm - else: - raise ValueError( - "not support comm_world, please choose one from [worker, server, all]" - ) - - return _comm_world - - def all_reduce(self, input, mode, comm_world="worker"): + def all_reduce(self, input, mode="sum", comm_world="worker"): """ All reduce `input` between specified collection. This is a distributed API. @@ -130,8 +103,7 @@ class UtilBase(object): if __name__ == "__main__": train() """ - _comm_world = self.__check_comm_world(comm_world) - return self.role_maker._all_reduce(_comm_world, input, mode) + return self.role_maker._all_reduce(input, mode, comm_world) def barrier(self, comm_world="worker"): """ @@ -170,8 +142,7 @@ class UtilBase(object): if __name__ == "__main__": train() """ - _comm_world = self.__check_comm_world(comm_world) - self.role_maker._barrier(_comm_world) + self.role_maker._barrier(comm_world) def all_gather(self, input, comm_world="worker"): """ @@ -219,8 +190,8 @@ class UtilBase(object): if __name__ == "__main__": train() """ - _comm_world = self.__check_comm_world(comm_world) - return self.role_maker._all_gather(_comm_world, input) + + return self.role_maker._all_gather(input, comm_world) def _broadcast(self): pass diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index a527393f602..4b629bc35ce 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -55,7 +55,10 @@ launch a process on each of the given gpu card or cpu machine. """ from __future__ import print_function + +import shutil import sys +import tempfile from sys import version import subprocess import os @@ -213,12 +216,20 @@ def launch_collective(args): cluster, pod = get_cluster_from_args(args, gpus) logger.debug("get cluster from args:{}".format(cluster)) + global_envs = copy.copy(os.environ.copy()) + gloo_rendezvous_dir = tempfile.mkdtemp() + # add gloo env + global_envs["PADDLE_WITH_GLOO"] = "1" + global_envs["PADDLE_GLOO_RENDEZVOUS"] = "2" + global_envs["PADDLE_GLOO_FS_PATH"] = gloo_rendezvous_dir + procs = start_local_trainers( cluster, pod, training_script=args.training_script, training_script_args=args.training_script_args, - log_dir=args.log_dir) + log_dir=args.log_dir, + envs=global_envs) while True: alive = watch_local_trainers(procs, cluster.trainers_nranks()) @@ -230,6 +241,9 @@ def launch_collective(args): time.sleep(3) + if os.path.exists(gloo_rendezvous_dir): + shutil.rmtree(gloo_rendezvous_dir) + def launch_ps(args): ports = None @@ -315,6 +329,13 @@ def launch_ps(args): default_env = os.environ.copy() current_env = copy.copy(default_env) + + gloo_rendezvous_dir = tempfile.mkdtemp() + # add gloo env + current_env["PADDLE_WITH_GLOO"] = "1" + current_env["PADDLE_GLOO_RENDEZVOUS"] = "2" + current_env["PADDLE_GLOO_FS_PATH"] = gloo_rendezvous_dir + current_env.pop("http_proxy", None) current_env.pop("https_proxy", None) procs = [] @@ -419,6 +440,9 @@ def launch_ps(args): procs[i].proc.terminate() print("all parameter server are killed", file=sys.stderr) + if os.path.exists(gloo_rendezvous_dir): + shutil.rmtree(gloo_rendezvous_dir) + def launch(): args = _parse_args() diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index b6f4c75a276..17d3b96cf44 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -398,8 +398,14 @@ def start_local_trainers(cluster, pod, training_script, training_script_args, - log_dir=None): - current_env = copy.copy(os.environ.copy()) + log_dir=None, + envs=None): + + if envs is None: + current_env = copy.copy(os.environ.copy()) + else: + current_env = copy.copy(envs) + #paddle broadcast ncclUniqueId use socket, and #proxy maybe make trainers unreachable, so delete them. #if we set them to "", grpc will log error message "bad uri" diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 4ced9841ee4..3a90b363f27 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -27,7 +27,7 @@ class TestFleetBase(unittest.TestCase): os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" os.environ["PADDLE_TRAINERS_NUM"] = "2" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ - "127.0.0.1:36001,127.0.0.2:36001" + "127.0.0.1:36001,127.0.0.2:36001" def test_init(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) @@ -88,7 +88,7 @@ class TestFleetBase(unittest.TestCase): def test_util(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - self.assertEqual(fleet.util, None) + self.assertEqual(fleet.util(), None) def test_barrier_worker(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) @@ -99,20 +99,17 @@ class TestFleetBase(unittest.TestCase): def test_init_worker(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - if fleet.is_worker(): - fleet.init_worker() - def test_run_server(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) - fleet.init(role) - if fleet.is_worker(): - fleet.run_worker() + with self.assertRaises(ValueError): + if fleet.is_worker(): + fleet.init_worker() def test_stop_worker(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - if fleet.is_worker(): - fleet.stop_worker() + with self.assertRaises(ValueError): + if fleet.is_worker(): + fleet.stop_worker() def test_distributed_optimizer(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index cf9b3e1e9a1..d786fa1eba8 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -15,7 +15,11 @@ from __future__ import print_function import os +import platform +import shutil +import tempfile import unittest +import paddle import paddle.distributed.fleet.base.role_maker as role_maker @@ -42,9 +46,9 @@ class TestRoleMakerBase(unittest.TestCase): self.assertTrue(len(pserver_endpoints) == 0) print(role.to_string()) - self.assertTrue(role._all_gather(role._node_type_comm, 1) is None) - self.assertTrue(role._all_reduce(role._node_type_comm, 1) is None) - role._barrier(role._node_type_comm) + self.assertTrue(role._all_gather(1, "worker") is None) + self.assertTrue(role._all_reduce(1, "sum", "worker") is None) + role._barrier("worker") class TestCloudRoleMaker(unittest.TestCase): @@ -72,8 +76,8 @@ class TestCloudRoleMaker(unittest.TestCase): print("warning: no netifaces, skip test_tr_rolemaker") return - ro = role_maker.PaddleCloudRoleMaker( - is_collective=False, init_gloo=False) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertTrue(ro.is_worker()) self.assertFalse(ro.is_server()) self.assertEqual(ro.worker_num(), 2) @@ -108,8 +112,9 @@ class TestCloudRoleMaker(unittest.TestCase): self.assertEqual(ro.server_num(), 2) pserver_endpoints = ro.get_pserver_endpoints() self.assertEqual(pserver_endpoints[0], '127.0.0.1:36001') - self.assertTrue(ro._all_gather(ro._all_comm, 1) is None) - self.assertTrue(ro._all_reduce(ro._all_comm, 1) is None) + + self.assertEqual(ro._all_gather(1, "worker"), 1) + self.assertEqual(ro._all_reduce(1, "sum", "worker"), 1) def test_traing_role(self): """Test training role.""" @@ -142,7 +147,7 @@ class TestUserDefinedRoleMaker(unittest.TestCase): ro = role_maker.UserDefinedRoleMaker( is_collective=False, init_gloo=False, - server_endpoints="127.0.0.1:36001,127.0.0.1:36001", + server_endpoints=["127.0.0.1:36001", "127.0.0.1:36001"], role=role_maker.Role.SERVER, current_id=0, worker_num=2) @@ -161,14 +166,274 @@ class TestUserDefinedRoleMaker(unittest.TestCase): ro = role_maker.UserDefinedRoleMaker( is_collective=False, init_gloo=False, - server_endpoints="127.0.0.1:36001,127.0.0.1:36001", + server_endpoints=["127.0.0.1:36001", "127.0.0.1:36001"], role=role_maker.Role.WORKER, current_id=0, worker_num=2) + self.assertIn("127.0.0.1:36001", ro.get_pserver_endpoints()) self.assertTrue(ro.is_worker()) self.assertEqual(ro.role_id(), 0) +class TestGlooWithCloudRoleMaker(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINERS_NUM"] = "1" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ID"] = "0" + + def case(self, role, comm_world): + role._barrier(comm_world) + + gather = role._all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = role._all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + def mkdir(self): + tmp = tempfile.mkdtemp() + return tmp + + def clean(self, tmp): + shutil.rmtree(tmp) + + def test_hdfs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo3(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo4(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3" + os.environ["PADDLE_GLOO_HTTP_HOST"] = "127.0.0.1" + os.environ["PADDLE_GLOO_HTTP_PORT"] = "30019" + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + import time + time.sleep(3) + + def test_fs_gloo5(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo6(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role.generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo7(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "5" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role.generate_role) + + def test_fs_gloo8(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + def net(): + x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost( + input=y_predict, label=y) + avg_cost = paddle.fluid.layers.mean(cost) + return avg_cost + + from paddle.distributed import fleet + + role = role_maker.PaddleCloudRoleMaker() + fleet.init(role) + avg_cost = net() + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.a_sync = False + + optimizer = paddle.optimizer.SGD(0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer.minimize(avg_cost) + + comm_world = "server" + fleet.util().barrier(comm_world) + + gather = fleet.util().all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = fleet.util().all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + self.clean(tmp) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_util.py b/python/paddle/fluid/tests/unittests/test_fleet_util.py index d506088fde0..1570912e740 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_util.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_util.py @@ -59,7 +59,7 @@ class TestFleetUtil(unittest.TestCase): import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - default_util = fleet.util + default_util = fleet.util() self.assertEqual(default_util, None) def test_set_user_defined_util(self): @@ -76,8 +76,8 @@ class TestFleetUtil(unittest.TestCase): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) my_util = UserDefinedUtil() - fleet.util = my_util - user_id = fleet.util.get_user_id() + fleet.set_util(my_util) + user_id = fleet.util().get_user_id() self.assertEqual(user_id, 10) def test_fs(self): @@ -88,97 +88,6 @@ class TestFleetUtil(unittest.TestCase): self.assertFalse(fs.need_upload_download()) fleet_util._set_file_system(fs) - def test_barrier(self): - try: - import netifaces - except: - print("warning: no netifaces, skip test_barrier") - return - - gloo = fluid.core.Gloo() - gloo.set_rank(0) - gloo.set_size(1) - gloo.set_prefix("123") - gloo.set_iface("lo") - gloo.set_hdfs_store("./tmp_test_fleet_barrier", "", "") - gloo.init() - - role = role_maker.UserDefinedRoleMaker( - is_collective=False, - init_gloo=False, - current_id=0, - role=role_maker.Role.SERVER, - worker_endpoints=["127.0.0.1:6003"], - server_endpoints=["127.0.0.1:6001"]) - role._node_type_comm = gloo - role._role_is_generated = True - fleet_util._set_role_maker(role) - - fleet_util.barrier("worker") - - def test_all_reduce(self): - try: - import netifaces - except: - print("warning: no netifaces, skip test_all_reduce") - return - - gloo = fluid.core.Gloo() - gloo.set_rank(0) - gloo.set_size(1) - gloo.set_prefix("123") - gloo.set_iface("lo") - gloo.set_hdfs_store("./tmp_test_fleet_reduce", "", "") - gloo.init() - - role = role_maker.UserDefinedRoleMaker( - is_collective=False, - init_gloo=False, - current_id=0, - role=role_maker.Role.WORKER, - worker_endpoints=["127.0.0.1:6003"], - server_endpoints=["127.0.0.1:6001"]) - role._node_type_comm = gloo - role._role_is_generated = True - fleet_util._set_role_maker(role) - - output = fleet_util.all_reduce(1, "sum", comm_world="server") - print(output) - - # self.assertEqual(output, 1) - - def test_all_gather(self): - try: - import netifaces - except: - print("warning: no netifaces, skip test_all_gather") - return - - gloo = fluid.core.Gloo() - gloo.set_rank(0) - gloo.set_size(1) - gloo.set_prefix("123") - gloo.set_iface("lo") - gloo.set_hdfs_store("./tmp_test_fleet_reduce", "", "") - gloo.init() - - role = role_maker.UserDefinedRoleMaker( - is_collective=False, - init_gloo=False, - current_id=0, - role=role_maker.Role.SERVER, - worker_endpoints=["127.0.0.1:6003"], - server_endpoints=["127.0.0.1:6001"]) - role._node_type_comm = gloo - role._all_comm = gloo - role._role_is_generated = True - fleet_util._set_role_maker(role) - - output = fleet_util.all_gather(1, comm_world="all") - print(output) - # self.assertTrue(len(output) == 1 and output[0] == 1) - self.assertRaises(Exception, fleet_util.all_gather, 1, "test") - def download_files(self): path = download(self.proto_data_url, self.module_name, self.proto_data_md5) -- GitLab From d6b54de46753827c23cabe5f3307f7493db194d0 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Sun, 20 Sep 2020 13:18:26 +0800 Subject: [PATCH 010/117] =?UTF-8?q?=E3=80=90paddle.fleet=E3=80=91Fix/role?= =?UTF-8?q?=20maker=20api=20fix=20(#27326)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix fleet util and gloo * fix worker endpoints * fix * fix UT * fix gloo * fix gloo * update gloo * update gloo * update gloo * update gloo * update gloo * fix gloo wrapper for hdfs * add file gloo and UT * fix UT * fix UT * fix UT * hide public method of RoleMaker * fix UT * GPU fleetrun support gloo * parameterserver fleetrun support gloo * add UT * add UT * fix UT * fix get server endpoint * fix get server endpoint * fix UT * hide public method of rolemaker * hide public method of rolemaker * hide public method of rolemaker * Update test_fleet_rolemaker_new.py * hide public method of rolemaker * hide public method of rolemaker --- .../distributed/fleet/base/fleet_base.py | 24 +- .../distributed/fleet/base/role_maker.py | 112 +++-- .../distributed/fleet/base/util_factory.py | 6 +- .../fleet/meta_optimizers/common.py | 6 +- .../fleet/meta_optimizers/dgc_optimizer.py | 4 +- .../graph_execution_optimizer.py | 18 +- .../meta_optimizers/localsgd_optimizer.py | 10 +- .../parameter_server_graph_optimizer.py | 2 +- .../parameter_server_optimizer.py | 4 +- .../meta_optimizers/pipeline_optimizer.py | 8 +- .../fleet/runtime/parameter_server_runtime.py | 21 +- .../fleet/parameter_server/ir/public.py | 30 +- .../fluid/tests/unittests/test_fleet_base.py | 49 ++- .../tests/unittests/test_fleet_rolemaker_2.py | 2 +- .../unittests/test_fleet_rolemaker_new.py | 414 ++++++++++++++++-- 15 files changed, 531 insertions(+), 179 deletions(-) diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index aeb8cac98e2..d00faac8385 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -180,7 +180,7 @@ class Fleet(object): raise ValueError( "`role_maker` should be subclass of `RoleMakerBase`, but got {}". format(type(role_maker))) - self._role_maker.generate_role() + self._role_maker._generate_role() self.strategy_compiler = StrategyCompiler() if paddle.fluid.framework.in_dygraph_mode(): @@ -207,7 +207,7 @@ class Fleet(object): fleet.is_first_worker() """ - return self._role_maker.is_first_worker() + return self._role_maker._is_first_worker() def worker_index(self): """ @@ -224,7 +224,7 @@ class Fleet(object): fleet.worker_index() """ - return self._role_maker.worker_index() + return self._role_maker._worker_index() def worker_num(self): """ @@ -241,7 +241,7 @@ class Fleet(object): fleet.worker_num() """ - return self._role_maker.worker_num() + return self._role_maker._worker_num() def is_worker(self): """ @@ -259,7 +259,7 @@ class Fleet(object): fleet.is_worker() """ - return self._role_maker.is_worker() + return self._role_maker._is_worker() def worker_endpoints(self, to_string=False): """ @@ -277,9 +277,9 @@ class Fleet(object): """ if to_string: - return ",".join(self._role_maker.get_trainer_endpoints()) + return ",".join(self._role_maker._get_trainer_endpoints()) else: - return self._role_maker.get_trainer_endpoints() + return self._role_maker._get_trainer_endpoints() def server_num(self): """ @@ -294,7 +294,7 @@ class Fleet(object): fleet.init() fleet.server_num() """ - return len(self._role_maker.get_pserver_endpoints()) + return len(self._role_maker._get_pserver_endpoints()) def server_index(self): """ @@ -311,7 +311,7 @@ class Fleet(object): fleet.server_index() """ - return self._role_maker.server_index() + return self._role_maker._server_index() def server_endpoints(self, to_string=False): """ @@ -330,9 +330,9 @@ class Fleet(object): """ if to_string: - return ",".join(self._role_maker.get_pserver_endpoints()) + return ",".join(self._role_maker._get_pserver_endpoints()) else: - return self._role_maker.get_pserver_endpoints() + return self._role_maker._get_pserver_endpoints() def is_server(self): """ @@ -350,7 +350,7 @@ class Fleet(object): fleet.is_server() """ - return self._role_maker.is_server( + return self._role_maker._is_server( ) or self._role_maker._is_heter_worker() def set_util(self, util): diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index d36c06047f5..81d5908ccd4 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -361,19 +361,19 @@ class RoleMakerBase(object): self._heter_trainer_device = "CPU" self._is_heter_parameter_server_mode = False - def is_worker(self): + def _is_worker(self): """ return is_worker() of current process """ raise NotImplementedError("Please implement this method in child class") - def is_server(self): + def _is_server(self): """ return is_server() of current process """ raise NotImplementedError("Please implement this method in child class") - def is_first_worker(self): + def _is_first_worker(self): """ Check whether the node is the first instance of worker. Returns: @@ -382,7 +382,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def worker_num(self): + def _worker_num(self): """ Get current total worker number. @@ -391,7 +391,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def server_num(self): + def _server_num(self): """ Get current total server number. @@ -400,7 +400,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def worker_index(self): + def _worker_index(self): """ Get current worker id. @@ -409,7 +409,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def server_index(self): + def _server_index(self): """ Get current server id. @@ -418,7 +418,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def role_id(self): + def _role_id(self): """ Get current id. @@ -427,7 +427,7 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def node_num(self): + def _node_num(self): """ Get the training node number Returns: @@ -435,13 +435,13 @@ class RoleMakerBase(object): """ raise NotImplementedError("Please implement this method in child class") - def get_trainer_endpoints(self): + def _get_trainer_endpoints(self): """ return trainer endpoints """ return self._worker_endpoints - def get_pserver_endpoints(self): + def _get_pserver_endpoints(self): """ return pserver endpoints """ @@ -543,90 +543,92 @@ class PaddleCloudRoleMaker(RoleMakerBase): def _all_reduce(self, input, mode="sum", comm_world="worker"): return self._gloo.all_reduce(input, mode, comm_world) - def is_worker(self): + def _is_worker(self): """ whether current process is worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.WORKER - def is_server(self): + def _is_server(self): """ whether current process is server """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.SERVER - def is_first_worker(self): + def _is_first_worker(self): """ whether current process is worker of rank 0 """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.WORKER and self._current_id == 0 - def worker_index(self): + def _worker_index(self): """ get index of current worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._current_id - def server_index(self): + def _server_index(self): """ get index of current server """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._current_id - def role_id(self): + def _role_id(self): """ get index of current node """ + if not self._role_is_generated: + self._generate_role() return self._current_id - def worker_num(self): + def _worker_num(self): """ retrun the current number of worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._trainers_num - def server_num(self): + def _server_num(self): """ return the current number of server """ if not self._role_is_generated: - self.generate_role() - return len(self.get_pserver_endpoints()) + self._generate_role() + return len(self._get_pserver_endpoints()) - def node_num(self): + def _node_num(self): """ return the training node number """ if not self._role_is_generated: - self.generate_role() - return self._node_num + self._generate_role() + return self._nodes_num - def get_trainer_endpoints(self): + def _get_trainer_endpoints(self): """ get endpoint of all trainers """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._worker_endpoints - def get_pserver_endpoints(self): + def _get_pserver_endpoints(self): """ get endpoint of all pservers """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._server_endpoints def _is_non_distributed(self): @@ -635,7 +637,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): (use python-run to launch fleet-code directly) """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._non_distributed def _heter_worker_num(self): @@ -643,7 +645,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): get heter worker nums """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._heter_trainers_num def _is_heter_worker(self): @@ -651,25 +653,9 @@ class PaddleCloudRoleMaker(RoleMakerBase): whether current process is heter worker """ if not self._role_is_generated: - self.generate_role() + self._generate_role() return self._role == Role.HETER_WORKER - def _get_rank(self): - """ - get current rank in all workers and pservers - """ - if not self._role_is_generated: - self.generate_role() - return self._rank - - def _get_size(self): - """ - get total num of all workers and pservers - """ - if not self._role_is_generated: - self.generate_role() - return self._size - def _ps_env(self): try: # Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set @@ -682,7 +668,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._trainers_num = 1 self._role = Role.WORKER self._current_id = 0 - self._node_num = 1 + self._nodes_num = 1 self._heter_trainers_num = 0 self._heter_trainer_endpoints = None self._non_distributed = True @@ -757,7 +743,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._trainers_num = trainers_num self._role = role self._current_id = current_id - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) self._heter_trainers_num = heter_trainers_num self._heter_trainer_endpoints = heter_trainer_eplist @@ -776,7 +762,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._non_distributed = True self._worker_endpoints = self._worker_endpoints.split(",") self._trainers_num = len(self._worker_endpoints) - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) def _gloo_init(self): @@ -832,13 +818,13 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._gloo.init( rendezvous=rendezvous_type, role=self._role, - role_id=self.role_id(), - worker_num=self.worker_num(), - server_num=self.server_num(), + role_id=self._role_id(), + worker_num=self._worker_num(), + server_num=self._server_num(), need_init_all=need_init_all, kwargs=kwargs) - def generate_role(self): + def _generate_role(self): """ generate role for role maker """ @@ -874,7 +860,7 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._cur_endpoint = self._worker_endpoints[self._current_id] elif self._role == Role.SERVER: self._cur_endpoint = self._server_endpoints[self._current_id] - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) def _user_defined_collective_env(self): @@ -882,10 +868,10 @@ class UserDefinedRoleMaker(PaddleCloudRoleMaker): self._current_id = self._kwargs.get("current_id") self._trainers_num = len(self._worker_endpoints) self._training_role = Role.WORKER - self._node_num = len( + self._nodes_num = len( set([x.split(':')[0] for x in self._worker_endpoints])) - def generate_role(self): + def _generate_role(self): """ generate role for role maker """ diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index e822c3c92f4..efaa854c087 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -237,8 +237,8 @@ class UtilBase(object): if not isinstance(files, list): raise TypeError("files should be a list of file need to be read.") - trainer_id = self.role_maker.worker_index() - trainers = self.role_maker.worker_num() + trainer_id = self.role_maker._worker_index() + trainers = self.role_maker._worker_num() remainder = len(files) % trainers blocksize = int(len(files) / trainers) @@ -280,7 +280,7 @@ class UtilBase(object): fleet_util._set_role_maker(role) fleet_util.print_on_rank("I'm worker 0", 0) """ - if self.role_maker.worker_index() != rank_id: + if self.role_maker._worker_index() != rank_id: return print(message) diff --git a/python/paddle/distributed/fleet/meta_optimizers/common.py b/python/paddle/distributed/fleet/meta_optimizers/common.py index 70b010978bb..8ff4114bf8e 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/common.py +++ b/python/paddle/distributed/fleet/meta_optimizers/common.py @@ -57,12 +57,12 @@ class CollectiveHelper(object): if startup_program is None: self.startup_program = fluid.default_startup_program() - endpoints = self.role_maker.get_trainer_endpoints() - current_endpoint = endpoints[self.role_maker.worker_index()] + endpoints = self.role_maker._get_trainer_endpoints() + current_endpoint = endpoints[self.role_maker._worker_index()] for ring_id in range(self.nrings): self._init_communicator( self.startup_program, current_endpoint, endpoints, - self.role_maker.worker_index(), ring_id, self.wait_port) + self.role_maker._worker_index(), ring_id, self.wait_port) self._broadcast_params() def _init_communicator(self, program, current_endpoint, endpoints, rank, diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 3f6ed1ed2f2..6806a479d30 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -47,7 +47,7 @@ class DGCOptimizer(MetaOptimizerBase): sparsity=configs['sparsity'], parameter_list=opt._parameter_list, use_nesterov=opt._use_nesterov, - num_trainers=self.role_maker.worker_num(), + num_trainers=self.role_maker._worker_num(), regularization=opt.regularization, grad_clip=opt._grad_clip, name=opt._name) @@ -60,7 +60,7 @@ class DGCOptimizer(MetaOptimizerBase): if not isinstance(self.inner_opt, Momentum): logging.warn("dgc only works on Momentum optimizer") return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: logging.warn("dgc only works on multi cards") return False diff --git a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py index 6c1cc3d7a97..0ad9e5680ea 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py @@ -50,12 +50,12 @@ class GraphExecutionOptimizer(MetaOptimizerBase): # should fix the variable def _setup_nccl_op(self, startup_program, main_program, build_strategy): - trainer_endpoints = self.role_maker.get_trainer_endpoints() + trainer_endpoints = self.role_maker._get_trainer_endpoints() trainers = trainer_endpoints - trainer_id = self.role_maker.worker_index() - current_endpoint = self.role_maker.get_trainer_endpoints()[trainer_id] + trainer_id = self.role_maker._worker_index() + current_endpoint = self.role_maker._get_trainer_endpoints()[trainer_id] trainer_endpoints_env = ",".join(trainer_endpoints) - trainers_num = self.role_maker.worker_num() + trainers_num = self.role_maker._worker_num() nccl_id_var = startup_program.global_block().create_var( name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) for i in range(1, build_strategy.nccl_comm_num): @@ -127,8 +127,8 @@ class GraphExecutionOptimizer(MetaOptimizerBase): local_build_strategy.enable_sequential_execution = True exe_strategy = self.user_defined_strategy.execution_strategy - worker_num = self.role_maker.worker_num() - node_num = self.role_maker.node_num() + worker_num = self.role_maker._worker_num() + node_num = self.role_maker._node_num() if self.role_maker._is_collective: assert worker_num >= 1, "nccl2 worker_num must >= 1, now:{}" % worker_num @@ -170,9 +170,9 @@ class GraphExecutionOptimizer(MetaOptimizerBase): # TODO(guru4elephant): should be an independent optimizer self._setup_nccl_op(startup_program, main_program, local_build_strategy) - local_build_strategy.num_trainers = self.role_maker.worker_num() - local_build_strategy.trainer_id = self.role_maker.worker_index() - local_build_strategy.trainers_endpoints = self.role_maker.get_trainer_endpoints( + local_build_strategy.num_trainers = self.role_maker._worker_num() + local_build_strategy.trainer_id = self.role_maker._worker_index() + local_build_strategy.trainers_endpoints = self.role_maker._get_trainer_endpoints( ) local_build_strategy.enable_backward_optimizer_op_deps = True diff --git a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py index 4ebac20888d..9f094978d84 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py @@ -38,7 +38,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): if not self.user_defined_strategy.localsgd: return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: return False return isinstance(self.inner_opt, paddle.optimizer.momentum.Momentum) \ @@ -168,7 +168,7 @@ class LocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op( @@ -208,7 +208,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): if not self.user_defined_strategy.adaptive_localsgd: return False - if self.role_maker.worker_num() <= 1: + if self.role_maker._worker_num() <= 1: return False return isinstance(self.inner_opt, paddle.optimizer.momentum.Momentum) \ @@ -275,7 +275,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [avg_loss]}, outputs={'Out': [avg_loss]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) @@ -398,7 +398,7 @@ class AdaptiveLocalSGDOptimizer(MetaOptimizerBase): inputs={'X': [param]}, outputs={'Out': [param]}, attrs={ - 'scale': 1.0 / self.role_maker.worker_num(), + 'scale': 1.0 / self.role_maker._worker_num(), OP_ROLE_KEY: OpRole.Optimize }) sub_block.append_op( diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py index 7dc532c86ea..dfa765364f3 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py @@ -31,7 +31,7 @@ class ParameterServerGraphOptimizer(ParameterServerOptimizer): if k_steps < 0: return False - if self.role_maker.is_server(): + if self.role_maker._is_server(): return False if self.role_maker._is_heter_parameter_server_mode: diff --git a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py index 51d4d343165..38ad41f8836 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py @@ -239,10 +239,10 @@ class ParameterServerOptimizer(MetaOptimizerBase): strategy, self.role_maker) compiled_config.strategy = strategy - if self.role_maker.is_worker() or self.role_maker._is_heter_worker(): + if self.role_maker._is_worker() or self.role_maker._is_heter_worker(): main_program, startup_program = self._build_trainer_programs( compiled_config) - elif self.role_maker.is_server(): + elif self.role_maker._is_server(): main_program, startup_program = self._build_pserver_programs( compiled_config) diff --git a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py index 87fa7077911..889fec838ed 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py @@ -126,11 +126,11 @@ class PipelineOptimizer(MetaOptimizerBase): optimize_ops, params_grads, prog_list = \ self.wrapped_opt.minimize(loss, startup_program, parameter_list, no_grad_set) - if self.role_maker.worker_num() == 1: + if self.role_maker._worker_num() == 1: return optimize_ops, params_grads - endpoints = self.role_maker.get_trainer_endpoints() - current_endpoint = endpoints[self.role_maker.worker_index()] + endpoints = self.role_maker._get_trainer_endpoints() + current_endpoint = endpoints[self.role_maker._worker_index()] self.startup_program = startup_program if startup_program is None: self.startup_program = fluid.default_startup_program() @@ -142,7 +142,7 @@ class PipelineOptimizer(MetaOptimizerBase): self.nranks = nranks self.nrings = len(self.main_program_list) - self.rank = self.role_maker.worker_index() + self.rank = self.role_maker._worker_index() self.endpoints = endpoints self.current_endpoint = current_endpoint diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 227f8f60210..ae5c53b8a37 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -104,9 +104,9 @@ class ParameterServerRuntime(RuntimeBase): def _init_worker(self): def sync_strategy_envs(): kwargs = {} - kwargs["pserver_endpoints"] = self.role_maker.get_pserver_endpoints( - ) - kwargs["trainer_id"] = self.role_maker.worker_index() + kwargs[ + "pserver_endpoints"] = self.role_maker._get_pserver_endpoints() + kwargs["trainer_id"] = self.role_maker._worker_index() return kwargs def geo_strategy_envs(): @@ -150,7 +150,7 @@ class ParameterServerRuntime(RuntimeBase): return "#".join(init_attrs) kwargs = {} - kwargs["trainers"] = self.role_maker.worker_num() + kwargs["trainers"] = self.role_maker._worker_num() kwargs["sparse_attrs"] = get_sparse_attrs() return kwargs @@ -338,7 +338,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": [",".join([str(i) for i in var.shape])], @@ -378,14 +378,15 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": slice_shapes, "slice_varnames": var_ctx.split_varnames(), "remote_varnames": var_ctx.split_varnames(), "is_sparse": True, "endpoints": var_ctx.split_endpoints(), - "pserver_num": len(self.role_maker.get_pserver_endpoints()), + "pserver_num": + len(self.role_maker._get_pserver_endpoints()), "file_path": os.path.join(dirname, var.name) }) @@ -403,7 +404,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": slice_shapes, "slice_varnames": slice_varnames, @@ -411,7 +412,7 @@ class ParameterServerRuntime(RuntimeBase): "is_sparse": True, "endpoints": var_ctx.split_endpoints(), "pserver_num": - len(self.role_maker.get_pserver_endpoints()), + len(self.role_maker._get_pserver_endpoints()), "file_path": os.path.join(dirname, var.name) }) @@ -422,7 +423,7 @@ class ParameterServerRuntime(RuntimeBase): block.append_op( type='recv_save', attrs={ - "trainer_id": self.role_maker.worker_index(), + "trainer_id": self.role_maker._worker_index(), "shape": var.shape, "slice_shapes": [",".join([str(i) for i in var.shape])], diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index 216478479a7..e348c67ae04 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -170,22 +170,40 @@ class CompileTimeStrategy(object): return trainer.mode == DistributedMode.ASYNC def get_role_id(self): - return self.role_maker.role_id() + try: + return self.role_maker._role_id() + except Exception: + return self.role_maker.role_id() def get_trainers(self): - return self.role_maker.worker_num() + try: + return self.role_maker._worker_num() + except Exception: + return self.role_maker.worker_num() def get_ps_endpoint(self): - return self.role_maker.get_pserver_endpoints()[self.get_role_id()] + try: + return self.role_maker._get_pserver_endpoints()[self.get_role_id()] + except Exception: + return self.role_maker.get_pserver_endpoints()[self.get_role_id()] def get_ps_endpoints(self): - return self.role_maker.get_pserver_endpoints() + try: + return self.role_maker._get_pserver_endpoints() + except Exception: + return self.role_maker.get_pserver_endpoints() def get_heter_worker_endpoints(self): - return self.role_maker._get_heter_worker_endpoints() + try: + return self.role_maker._get_heter_worker_endpoints() + except Exception: + return self.role_maker.get_heter_worker_endpoints() def get_heter_worker_endpoint(self): - return self.role_maker._get_heter_worker_endpoint() + try: + return self.role_maker._get_heter_worker_endpoint() + except Exception: + return self.role_maker.get_heter_worker_endpoint() def get_origin_programs(self): return self.origin_main_program, self.origin_startup_program diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 3a90b363f27..45597e7253c 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -24,10 +24,10 @@ import numpy as np class TestFleetBase(unittest.TestCase): def setUp(self): os.environ["POD_IP"] = "127.0.0.1" - os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36000" os.environ["PADDLE_TRAINERS_NUM"] = "2" os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = \ - "127.0.0.1:36001,127.0.0.2:36001" + "127.0.0.1:36001,127.0.0.2:36002" def test_init(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) @@ -58,32 +58,51 @@ class TestFleetBase(unittest.TestCase): def test_worker_endpoints(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - print(fleet.worker_endpoints(to_string=True)) + self.assertEqual( + "127.0.0.1:36000", fleet.worker_endpoints(to_string=True)) + self.assertEqual(["127.0.0.1:36000"], fleet.worker_endpoints()) def test_server_num(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("fleet server num: {}".format(fleet.server_num())) + os.environ["PADDLE_TRAINERS_NUM"] = "2" + self.assertEqual(2, fleet.server_num()) def test_server_index(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("fleet server index: {}".format(fleet.server_index())) + self.assertEqual(0, fleet.server_index()) def test_server_endpoints(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) if fleet.is_server(): - print("fleet server index: {}".format( - fleet.server_endpoints(to_string=True))) + self.assertEqual( + "127.0.0.1:36001,127.0.0.2:36002", + fleet.server_endpoints(to_string=True)) + self.assertEqual(["127.0.0.1:36001", "127.0.0.2:36002"], + fleet.server_endpoints()) def test_is_server(self): - role = role_maker.PaddleCloudRoleMaker(is_collective=True) + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PORT"] = "36001" + os.environ["POD_IP"] = "127.0.0.1" + + role = role_maker.PaddleCloudRoleMaker() fleet.init(role) - if fleet.is_server(): - print("test fleet is server") + self.assertTrue(fleet.is_server()) def test_util(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py index a831f6e838e..dae79071616 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_2.py @@ -87,7 +87,7 @@ class TestCloudRoleMaker2(unittest.TestCase): role2._all_gather(1) role2._all_gather(1) role2._barrier_server() - role2.all_gather(1) + role2._all_gather(1) role3 = GeneralRoleMaker(path="./test_gloo_3") role3._worker_gather(1) role3._worker_gather(1) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index d786fa1eba8..4dd254af251 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -30,19 +30,19 @@ class TestRoleMakerBase(unittest.TestCase): def test_rolemaker_base(self): role = role_maker.RoleMakerBase() - self.assertRaises(Exception, role.is_worker) - self.assertRaises(Exception, role.is_server) - self.assertRaises(Exception, role.is_first_worker) - self.assertRaises(Exception, role.worker_num) - self.assertRaises(Exception, role.server_num) - self.assertRaises(Exception, role.worker_index) - self.assertRaises(Exception, role.server_index) - self.assertRaises(Exception, role.role_id) - self.assertRaises(Exception, role.node_num) - - trainer_endpoints = role.get_trainer_endpoints() + self.assertRaises(Exception, role._is_worker) + self.assertRaises(Exception, role._is_server) + self.assertRaises(Exception, role._is_first_worker) + self.assertRaises(Exception, role._worker_num) + self.assertRaises(Exception, role._server_num) + self.assertRaises(Exception, role._worker_index) + self.assertRaises(Exception, role._server_index) + self.assertRaises(Exception, role._role_id) + self.assertRaises(Exception, role._node_num) + + trainer_endpoints = role._get_trainer_endpoints() self.assertTrue(len(trainer_endpoints) == 0) - pserver_endpoints = role.get_pserver_endpoints() + pserver_endpoints = role._get_pserver_endpoints() self.assertTrue(len(pserver_endpoints) == 0) print(role.to_string()) @@ -77,20 +77,32 @@ class TestCloudRoleMaker(unittest.TestCase): return ro = role_maker.PaddleCloudRoleMaker(is_collective=False) - - self.assertTrue(ro.is_worker()) - self.assertFalse(ro.is_server()) - self.assertEqual(ro.worker_num(), 2) - self.assertTrue(ro.is_first_worker()) - worker_endpoints = ro.get_trainer_endpoints() + self.assertTrue(ro._is_worker()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_server()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._worker_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertTrue(ro._is_first_worker()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + worker_endpoints = ro._get_trainer_endpoints() self.assertEqual(worker_endpoints[0], '127.0.0.1:36001') - self.assertEqual(ro.role_id(), 0) - self.assertEqual(ro.node_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._role_id(), 0) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._node_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_non_distributed()) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertEqual(ro._heter_worker_num(), 0) + ro = role_maker.PaddleCloudRoleMaker(is_collective=False) + self.assertFalse(ro._is_heter_worker()) def test_tr_rolemaker_collective(self): ro = role_maker.PaddleCloudRoleMaker(is_collective=True) - self.assertEqual(ro.worker_num(), 2) - self.assertEqual(ro.node_num(), 2) + self.assertEqual(ro._worker_num(), 2) + ro = role_maker.PaddleCloudRoleMaker(is_collective=True) + self.assertEqual(ro._node_num(), 2) def test_ps_rolemaker(self): """Test ps rolemaker.""" @@ -106,11 +118,11 @@ class TestCloudRoleMaker(unittest.TestCase): ro = role_maker.PaddleCloudRoleMaker( is_collective=False, init_gloo=False) - self.assertEqual(ro.server_index(), 0) - self.assertFalse(ro.is_worker()) - self.assertTrue(ro.is_server()) - self.assertEqual(ro.server_num(), 2) - pserver_endpoints = ro.get_pserver_endpoints() + self.assertEqual(ro._server_index(), 0) + self.assertFalse(ro._is_worker()) + self.assertTrue(ro._is_server()) + self.assertEqual(ro._server_num(), 2) + pserver_endpoints = ro._get_pserver_endpoints() self.assertEqual(pserver_endpoints[0], '127.0.0.1:36001') self.assertEqual(ro._all_gather(1, "worker"), 1) @@ -126,7 +138,7 @@ class TestCloudRoleMaker(unittest.TestCase): return ro = role_maker.PaddleCloudRoleMaker(is_collective=False) - self.assertRaises(ValueError, ro.generate_role) + self.assertRaises(ValueError, ro._generate_role) class TestUserDefinedRoleMaker(unittest.TestCase): @@ -151,10 +163,10 @@ class TestUserDefinedRoleMaker(unittest.TestCase): role=role_maker.Role.SERVER, current_id=0, worker_num=2) - self.assertEqual(ro.server_num(), 2) - ro.generate_role() - self.assertTrue(ro.is_server()) - self.assertEqual(ro.role_id(), 0) + self.assertEqual(ro._server_num(), 2) + ro._generate_role() + self.assertTrue(ro._is_server()) + self.assertEqual(ro._role_id(), 0) def test_tr_rolemaker(self): try: @@ -171,9 +183,9 @@ class TestUserDefinedRoleMaker(unittest.TestCase): current_id=0, worker_num=2) - self.assertIn("127.0.0.1:36001", ro.get_pserver_endpoints()) - self.assertTrue(ro.is_worker()) - self.assertEqual(ro.role_id(), 0) + self.assertIn("127.0.0.1:36001", ro._get_pserver_endpoints()) + self.assertTrue(ro._is_worker()) + self.assertEqual(ro._role_id(), 0) class TestGlooWithCloudRoleMaker(unittest.TestCase): @@ -216,7 +228,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "worker") self.clean(tmp) @@ -234,7 +246,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "worker") self.clean(tmp) @@ -256,7 +268,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.clean(tmp) @@ -280,7 +292,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.clean(tmp) @@ -302,7 +314,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_HTTP_PORT"] = "30019" role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() import time time.sleep(3) @@ -326,7 +338,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.case(role, "all") self.clean(tmp) @@ -354,7 +366,7 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_FS_PATH"] = tmp role = role_maker.PaddleCloudRoleMaker() - role.generate_role() + role._generate_role() self.case(role, "server") self.case(role, "all") self.clean(tmp) @@ -377,7 +389,323 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): os.environ["PADDLE_GLOO_RENDEZVOUS"] = "5" role = role_maker.PaddleCloudRoleMaker() - self.assertRaises(ValueError, role.generate_role) + self.assertRaises(ValueError, role._generate_role) + + def test_fs_gloo8(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + def net(): + x = paddle.fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = paddle.fluid.layers.fc(input=x, size=1, act=None) + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost( + input=y_predict, label=y) + avg_cost = paddle.fluid.layers.mean(cost) + return avg_cost + + from paddle.distributed import fleet + + role = role_maker.PaddleCloudRoleMaker() + fleet.init(role) + avg_cost = net() + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.a_sync = False + + optimizer = paddle.optimizer.SGD(0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer.minimize(avg_cost) + + comm_world = "server" + fleet.util().barrier(comm_world) + + gather = fleet.util().all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = fleet.util().all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + self.clean(tmp) + + +class TestGlooWithCloudRoleMaker(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINERS_NUM"] = "1" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_TRAINER_ID"] = "0" + + def case(self, role, comm_world): + role._barrier(comm_world) + + gather = role._all_gather(1, comm_world) + self.assertEqual(gather[0], 1) + + all_reduce = role._all_reduce(1, "sum", comm_world) + self.assertEqual(1, all_reduce) + + def mkdir(self): + tmp = tempfile.mkdtemp() + return tmp + + def clean(self, tmp): + shutil.rmtree(tmp) + + def test_hdfs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "worker") + self.clean(tmp) + + def test_fs_gloo2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo3(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.clean(tmp) + + def test_fs_gloo4(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3" + os.environ["PADDLE_GLOO_HTTP_HOST"] = "127.0.0.1" + os.environ["PADDLE_GLOO_HTTP_PORT"] = "30019" + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + import time + time.sleep(3) + + def test_fs_gloo5(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo6(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + tmp = self.mkdir() + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "2" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "NULL" + os.environ["PADDLE_GLOO_FS_UGI"] = "NULL" + os.environ["PADDLE_GLOO_FS_PATH"] = tmp + + role = role_maker.PaddleCloudRoleMaker() + role._generate_role() + self.case(role, "server") + self.case(role, "all") + self.clean(tmp) + + def test_fs_gloo7(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "5" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_hdfs_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "TRAINER" + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "1" + os.environ["PADDLE_GLOO_FS_NAME"] = "" + os.environ["PADDLE_GLOO_FS_UGI"] = "" + os.environ["PADDLE_GLOO_FS_PATH"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_fs_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + os.environ["PADDLE_TRAINERS_NUM"] = "0" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "2" + os.environ["PADDLE_GLOO_FS_PATH"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) + + def test_http_gloo_v2(self): + plats = platform.platform() + if 'Linux' not in plats: + print("skip gloo UT on MacOS/Win") + return + + os.environ["TRAINING_ROLE"] = "PSERVER" + os.environ["PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36001" + os.environ["POD_IP"] = "127.0.0.1" + os.environ["PADDLE_PORT"] = "36001" + + os.environ["SYS_JOB_ID"] = "gloo_for_cluster" + os.environ["PADDLE_WITH_GLOO"] = "1" + os.environ["PADDLE_GLOO_RENDEZVOUS"] = "3" + os.environ["PADDLE_GLOO_HTTP_HOST"] = "" + os.environ["PADDLE_GLOO_HTTP_PORT"] = "" + + role = role_maker.PaddleCloudRoleMaker() + self.assertRaises(ValueError, role._generate_role) def test_fs_gloo8(self): plats = platform.platform() -- GitLab From f936adbd2d9e2a34dd4797ef1769e2c38e8cfae2 Mon Sep 17 00:00:00 2001 From: MRXLT Date: Mon, 21 Sep 2020 11:16:34 +0800 Subject: [PATCH 011/117] fix adam (#27343) * fix adam * rmsprop support double --- paddle/fluid/operators/optimizers/rmsprop_op.cc | 3 ++- paddle/fluid/operators/optimizers/rmsprop_op.cu | 3 ++- python/paddle/optimizer/adam.py | 11 +++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cc b/paddle/fluid/operators/optimizers/rmsprop_op.cc index 99d1156ee6d..eeee008cdc5 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cc @@ -143,4 +143,5 @@ http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker); REGISTER_OP_CPU_KERNEL( - rmsprop, ops::RmspropOpKernel); + rmsprop, ops::RmspropOpKernel, + ops::RmspropOpKernel); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cu b/paddle/fluid/operators/optimizers/rmsprop_op.cu index 8b17d6a0204..bf11ee68675 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cu +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cu @@ -15,4 +15,5 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - rmsprop, ops::RmspropOpKernel); + rmsprop, ops::RmspropOpKernel, + ops::RmspropOpKernel); diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 708aaa788f6..24cebf8e6e6 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -282,14 +282,13 @@ class Adam(Optimizer): for param in self._parameter_list: if not param.trainable: continue - if hasattr( - param, "_is_sparse" - ) and param._is_sparse and self.regularization is not None: - raise RuntimeError( - "Adam don't support weight_decay with sparse parameters, please set it to None." - ) if param._grad_ivar() is not None: grad_var = param._grad_ivar() + if hasattr(grad_var, "_is_sparse") and grad_var._is_sparse( + ) and self.regularization is not None: + raise RuntimeError( + "Adam don't support weight_decay with sparse parameters, please set it to None." + ) params_grads.append((param, grad_var)) optimize_ops = self._apply_optimize( -- GitLab From f3b4a64addfdae3f8c8f56ac919dc1e1ed1be229 Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 21 Sep 2020 11:23:46 +0800 Subject: [PATCH 012/117] fix CIFAR MNIST UCIHousing dataset. test=develop (#27368) * fix CIFAR & MNIST dataset. test=develop --- python/paddle/tests/test_dataset_cifar.py | 24 ++++++++++++++-------- python/paddle/tests/test_datasets.py | 6 ++++-- python/paddle/text/datasets/uci_housing.py | 6 +++++- python/paddle/vision/datasets/cifar.py | 1 + python/paddle/vision/datasets/mnist.py | 11 +--------- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/python/paddle/tests/test_dataset_cifar.py b/python/paddle/tests/test_dataset_cifar.py index 2ecc41c3f0a..672de7ae8e9 100644 --- a/python/paddle/tests/test_dataset_cifar.py +++ b/python/paddle/tests/test_dataset_cifar.py @@ -27,8 +27,10 @@ class TestCifar10Train(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 50000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 9) @@ -41,8 +43,10 @@ class TestCifar10Test(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 10000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 9) @@ -55,8 +59,10 @@ class TestCifar100Train(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 50000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 99) @@ -69,8 +75,10 @@ class TestCifar100Test(unittest.TestCase): # long time, randomly check 1 sample idx = np.random.randint(0, 10000) data, label = cifar[idx] - self.assertTrue(len(data.shape) == 1) - self.assertTrue(data.shape[0] == 3072) + self.assertTrue(len(data.shape) == 3) + self.assertTrue(data.shape[0] == 3) + self.assertTrue(data.shape[1] == 32) + self.assertTrue(data.shape[2] == 32) self.assertTrue(0 <= int(label) <= 99) diff --git a/python/paddle/tests/test_datasets.py b/python/paddle/tests/test_datasets.py index 1e50ff60aa5..1e0d6dbacf6 100644 --- a/python/paddle/tests/test_datasets.py +++ b/python/paddle/tests/test_datasets.py @@ -103,12 +103,14 @@ class TestMNISTTest(unittest.TestCase): class TestMNISTTrain(unittest.TestCase): def test_main(self): - mnist = MNIST(mode='train', chw_format=False) + mnist = MNIST(mode='train') self.assertTrue(len(mnist) == 60000) for i in range(len(mnist)): image, label = mnist[i] - self.assertTrue(image.shape[0] == 784) + self.assertTrue(image.shape[0] == 1) + self.assertTrue(image.shape[1] == 28) + self.assertTrue(image.shape[2] == 28) self.assertTrue(label.shape[0] == 1) self.assertTrue(0 <= int(label) <= 9) diff --git a/python/paddle/text/datasets/uci_housing.py b/python/paddle/text/datasets/uci_housing.py index a0d465eb177..a8dfbc44a97 100644 --- a/python/paddle/text/datasets/uci_housing.py +++ b/python/paddle/text/datasets/uci_housing.py @@ -17,6 +17,7 @@ from __future__ import print_function import six import numpy as np +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -88,6 +89,8 @@ class UCIHousing(Dataset): # read dataset into memory self._load_data() + self.dtype = paddle.get_default_dtype() + def _load_data(self, feature_num=14, ratio=0.8): data = np.fromfile(self.data_file, sep=' ') data = data.reshape(data.shape[0] // feature_num, feature_num) @@ -103,7 +106,8 @@ class UCIHousing(Dataset): def __getitem__(self, idx): data = self.data[idx] - return np.array(data[:-1]), np.array(data[-1:]) + return np.array(data[:-1]).astype(self.dtype), \ + np.array(data[-1:]).astype(self.dtype) def __len__(self): return len(self.data) diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py index 1193be26da5..631892ee4dc 100644 --- a/python/paddle/vision/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -139,6 +139,7 @@ class Cifar10(Dataset): def __getitem__(self, idx): image, label = self.data[idx] + image = np.reshape(image, [3, 32, 32]) if self.transform is not None: image = self.transform(image) return image, label diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py index a9856133392..597d4046441 100644 --- a/python/paddle/vision/datasets/mnist.py +++ b/python/paddle/vision/datasets/mnist.py @@ -44,8 +44,6 @@ class MNIST(Dataset): :attr:`download` is True. Default None label_path(str): path to label file, can be set None if :attr:`download` is True. Default None - chw_format(bool): If set True, the output shape is [1, 28, 28], - otherwise, output shape is [1, 784]. Default True. mode(str): 'train' or 'test' mode. Default 'train'. download(bool): whether to download dataset automatically if :attr:`image_path` :attr:`label_path` is not set. Default True @@ -70,14 +68,12 @@ class MNIST(Dataset): def __init__(self, image_path=None, label_path=None, - chw_format=True, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) self.mode = mode.lower() - self.chw_format = chw_format self.image_path = image_path if self.image_path is None: assert download, "image_path is not set and downloading automatically is disabled" @@ -139,10 +135,6 @@ class MNIST(Dataset): cols)).astype('float32') offset_img += struct.calcsize(fmt_images) - images = images / 255.0 - images = images * 2.0 - images = images - 1.0 - for i in range(buffer_size): self.images.append(images[i, :]) self.labels.append( @@ -150,8 +142,7 @@ class MNIST(Dataset): def __getitem__(self, idx): image, label = self.images[idx], self.labels[idx] - if self.chw_format: - image = np.reshape(image, [1, 28, 28]) + image = np.reshape(image, [1, 28, 28]) if self.transform is not None: image = self.transform(image) return image, label -- GitLab From bbc84e0fe0f4401c4a087f74fdf24863b4157b4d Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 21 Sep 2020 13:28:08 +0800 Subject: [PATCH 013/117] Refine error msg in paddle/fluid/framework/details [part 1] (#25631) * refine error msg in var_handle.h, test=develop * refine all_reduce_op_handle * fix some error msg * refine variable_visitor * refine threaded_ssa_graph_executor * refine inplace related files * refine executor related files * refine fetch_op_handle.cc * fix bug * follow comments --- .../framework/details/all_reduce_op_handle.cc | 82 ++++++++++++++----- .../details/async_ssa_graph_executor.cc | 15 +++- .../fast_threaded_ssa_graph_executor.cc | 6 +- .../framework/details/fetch_op_handle.cc | 8 +- .../details/parallel_ssa_graph_executor.cc | 9 +- .../scope_buffered_ssa_graph_executor.cc | 10 ++- .../details/share_tensor_buffer_functor.cc | 58 ++++++++++--- .../details/share_tensor_buffer_op_handle.cc | 16 +++- .../framework/details/ssa_graph_executor.cc | 6 +- .../details/threaded_ssa_graph_executor.cc | 21 +++-- .../details/threaded_ssa_graph_executor.h | 4 +- paddle/fluid/framework/details/var_handle.h | 11 ++- .../framework/details/variable_visitor.cc | 71 ++++++++++------ 13 files changed, 237 insertions(+), 80 deletions(-) diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 7fc08f3e0f2..939a2fc8fc9 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -12,7 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/all_reduce_op_handle.h" + #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -34,14 +36,24 @@ AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, const std::vector &places, const platform::NCCLCommunicator *ctxs) : NCCLOpHandleBase(node, places, ctxs), local_scopes_(local_scopes) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); } #else AllReduceOpHandle::AllReduceOpHandle(ir::Node *node, const std::vector &local_scopes, const std::vector &places) : OpHandleBase(node), local_scopes_(local_scopes), places_(places) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); } #endif @@ -60,13 +72,25 @@ void AllReduceOpHandle::AllReduceImpl( const std::vector &in_var_handles, const std::vector &out_var_handles) { size_t num_places = places_.size(); - PADDLE_ENFORCE_EQ( - in_var_handles.size(), num_places, - "The NoDummyInputSize should be equal to the number of places."); + PADDLE_ENFORCE_EQ(in_var_handles.size(), num_places, + platform::errors::InvalidArgument( + "The NoDummyInputSize should be equal " + "to the number of places, but got NoDummyInputSize is " + "%d and the number of place is %d.", + in_var_handles.size(), num_places)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); - PADDLE_ENFORCE_EQ(local_exec_scopes_.size(), num_places); + platform::errors::InvalidArgument( + "The NoDummyInputSize and NoDummyOutputSize should be " + "equal, but got NoDummyInputSize is %d and NoDummyOutputSize is %d.", + in_var_handles.size(), out_var_handles.size())); + PADDLE_ENFORCE_EQ( + local_exec_scopes_.size(), num_places, + platform::errors::InvalidArgument( + "The number of local scopes should be equal " + "to the number of places, but got the number of local scopes is " + "%d and the number of place is %d.", + in_var_handles.size(), num_places)); std::vector lod_tensor_data; std::vector places; @@ -78,23 +102,36 @@ void AllReduceOpHandle::AllReduceImpl( for (size_t i = 0; i < local_exec_scopes_.size(); ++i) { auto &local_scope = local_exec_scopes_[i]; auto var = local_scope->FindVar(in_var_handles[i]->name()); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found int scope.", - in_var_handles[i]->name()); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::NotFound( + "Variable %s is not found in local scope.", + in_var_handles[i]->name())); auto &lod_tensor = var->Get(); if (i == 0) { numel = static_cast(lod_tensor.numel()); // only enforce place0, we will enforce other palce numel == place0 numel PADDLE_ENFORCE_GT( - numel, 0, platform::errors::InvalidArgument( - "The numel of tensos=[%s] must > 0. But now numel=[%d]", - in_var_handles[i]->name(), numel)); + numel, 0, + platform::errors::PreconditionNotMet( + "The numel of tensor %s should be > 0, but got numel is %d.", + in_var_handles[i]->name(), numel)); dtype = lod_tensor.type(); is_gpu_place = platform::is_gpu_place(lod_tensor.place()); } - PADDLE_ENFORCE_EQ(numel, static_cast(lod_tensor.numel())); - PADDLE_ENFORCE_EQ(dtype, lod_tensor.type()); - PADDLE_ENFORCE_EQ(is_gpu_place, platform::is_gpu_place(lod_tensor.place())); + PADDLE_ENFORCE_EQ( + numel, static_cast(lod_tensor.numel()), + platform::errors::PreconditionNotMet( + "The size of tensors of the same variable in different local " + "scopes should be equal.")); + PADDLE_ENFORCE_EQ( + dtype, lod_tensor.type(), + platform::errors::PreconditionNotMet( + "The dtype of tensors of the same variable in different local " + "scopes should be equal.")); + PADDLE_ENFORCE_EQ(is_gpu_place, platform::is_gpu_place(lod_tensor.place()), + platform::errors::PreconditionNotMet( + "The place type of tensors of the same variable " + "in different local scopes should be equal.")); lod_tensor_data.emplace_back(lod_tensor.data()); places.emplace_back(lod_tensor.place()); @@ -102,8 +139,12 @@ void AllReduceOpHandle::AllReduceImpl( VLOG(10) << "place:" << i << ", input_name:" << in_var_handles[i]->name() << ", out_name:" << out_var_handles[i]->name(); - PADDLE_ENFORCE_EQ(in_var_handles[i]->name(), out_var_handles[i]->name(), - "The name of input and output should be equal."); + PADDLE_ENFORCE_EQ( + in_var_handles[i]->name(), out_var_handles[i]->name(), + platform::errors::InvalidArgument( + "The name of input and output of all_reduce op should be equal, " + "but got input is %s and output is %s.", + in_var_handles[i]->name(), out_var_handles[i]->name())); } std::vector grad_var_names; @@ -122,7 +163,9 @@ void AllReduceOpHandle::AllReduceFunc( const std::vector &out_var_names) { if (is_gpu_place(places[0])) { #if defined(PADDLE_WITH_NCCL) - PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, "nccl_ctxs should not be nullptr."); + PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, + platform::errors::InvalidArgument( + "The nccl context should not be NULL.")); ncclDataType_t nccl_dtype = platform::ToNCCLDataType(dtype); std::vector> all_reduce_calls; for (size_t i = 0; i < local_exec_scopes_.size(); ++i) { @@ -134,7 +177,8 @@ void AllReduceOpHandle::AllReduceFunc( } NCCLAllReduceFunc(all_reduce_calls); #else - PADDLE_THROW("Not compiled with CUDA."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { // Special handle CPU only Operator's gradient. Like CRF auto &trg = *local_exec_scopes_[0] diff --git a/paddle/fluid/framework/details/async_ssa_graph_executor.cc b/paddle/fluid/framework/details/async_ssa_graph_executor.cc index d42bd0b16d7..12c0d674902 100644 --- a/paddle/fluid/framework/details/async_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/async_ssa_graph_executor.cc @@ -89,8 +89,19 @@ AsyncSSAGraphExecutor::AsyncSSAGraphExecutor( places_(std::move(places)), graphs_(std::move(graphs)) { VLOG(3) << "build AsyncSSAGraphExecutor"; - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); - PADDLE_ENFORCE_EQ(local_scopes_.size(), local_exec_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); + PADDLE_ENFORCE_EQ( + local_scopes_.size(), local_exec_scopes_.size(), + platform::errors::InvalidArgument( + "The number of local scopes and the number of local execution scopes " + "should be equal, but got number of local scopes is %d and " + "number of local execution scopes is %d.", + local_scopes_.size(), local_exec_scopes_.size())); // set the correct size of thread pool to each device. strategy_.num_threads_ = strategy_.num_threads_ < places_.size() diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index e440dff2af6..7f1d3c9b340 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_async_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -48,7 +50,9 @@ FastThreadedSSAGraphExecutor::FastThreadedSSAGraphExecutor( bootstrap_ops_.emplace_back(op); } } - PADDLE_ENFORCE_GT(op_deps_.size(), 0, "The graph doesn't have operators."); + PADDLE_ENFORCE_GT(op_deps_.size(), 0, + platform::errors::PreconditionNotMet( + "The graph doesn't have operators.")); PrepareAtomicOpDeps(); } diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index ae69960ef78..aedb8db46a5 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/fetch_op_handle.h" + #include #include #include + #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -138,8 +140,10 @@ void FetchOpHandle::RunImpl() { auto *var_handle = static_cast(inputs_[i]); auto &scope = scopes.at(var_handle->scope_idx()); auto *var = scope->FindVar(var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(var, "Cannot find variable %s in execution scope", - var_handle->name()); + PADDLE_ENFORCE_NOT_NULL( + var, + platform::errors::NotFound( + "Cannot find variable %s in execution scope.", var_handle->name())); if (var->IsType()) { auto &t = var->Get(); diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index e7d466c4af0..35834fe5d74 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/parallel_ssa_graph_executor.h" + #include #include #include + #include "paddle/fluid/framework/ir/graph_helper.h" namespace paddle { @@ -104,7 +106,12 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( places_(places), graphs_(std::move(graphs)), feed_status_(places.size(), FeedStatus::kNone) { - PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size()); + PADDLE_ENFORCE_EQ(places_.size(), local_scopes_.size(), + platform::errors::InvalidArgument( + "The number of places and the number of local scopes " + "should be equal, but got number of places is %d and " + "number of local scopes is %d.", + places_.size(), local_scopes_.size())); PADDLE_ENFORCE_EQ(places_.size(), graphs_.size(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc index fe86d002ca8..7cc1f541314 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h" + #include #include #include #include + #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" @@ -37,7 +39,13 @@ ScopeBufferedSSAGraphExecutor::ScopeBufferedSSAGraphExecutor( var_infos_(std::move(var_infos)), places_(std::move(places)), scope_monitor_(places_, local_exec_scopes_) { - PADDLE_ENFORCE_EQ(local_scopes_.size(), local_exec_scopes_.size()); + PADDLE_ENFORCE_EQ( + local_scopes_.size(), local_exec_scopes_.size(), + platform::errors::InvalidArgument( + "The number of local scopes and the number of local execution scopes " + "should be equal, but got number of local scopes is %d and " + "number of local execution scopes is %d.", + local_scopes_.size(), local_exec_scopes_.size())); PrepareLocalExeScopes(); } diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index 6fdec553f3d..19f075018ce 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" + #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/enforce.h" @@ -29,7 +31,8 @@ static inline const Tensor &GetTensorFromVar(const Variable *var) { if (var->IsType()) { return var->Get(); } else { - PADDLE_THROW("Variable must be type of LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor.")); } } @@ -37,7 +40,8 @@ static inline Tensor *GetMutableTensorFromVar(Variable *var) { if (var->IsType()) { return var->GetMutable(); } else { - PADDLE_THROW("Variable must be type of LoDTensor"); + PADDLE_THROW(platform::errors::InvalidArgument( + "Variable must be type of LoDTensor.")); } } @@ -50,7 +54,12 @@ ShareTensorBufferFunctor::ShareTensorBufferFunctor( op_type_(op_type), in_var_infos_(in_var_infos), out_var_names_(out_var_names) { - PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size()); + PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size(), + platform::errors::PreconditionNotMet( + "The number of input variables and output variables " + "should be equal, but got number of input variables is " + "%d and number of output variables is %d.", + in_var_infos_.size(), out_var_names_.size())); for (size_t i = 0; i < in_var_infos_.size(); ++i) { AddReuseVarPair(in_var_infos_[i], out_var_names_[i]); } @@ -67,32 +76,59 @@ ShareTensorBufferFunctor::ReusedVars() const { void ShareTensorBufferFunctor::AddReuseVarPair( const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name) { - PADDLE_ENFORCE_NOT_NULL(in_var_info, "in_var_info cannot be nullptr"); + PADDLE_ENFORCE_NOT_NULL( + in_var_info, + platform::errors::InvalidArgument( + "The input variables to be inplaced should not be NULL.")); PADDLE_ENFORCE_NE(in_var_info->Name(), out_var_name, - "in/out cannot have same name: %s", out_var_name); + platform::errors::InvalidArgument( + "The input variable and output variable to be inplaced " + "cannot have the same name: %s.", + out_var_name)); in_var_infos_.emplace_back(in_var_info); out_var_names_.emplace_back(out_var_name); } void ShareTensorBufferFunctor::CallOnce() { - PADDLE_ENFORCE(in_out_vars_.empty(), "in_out_vars_ must be initialized here"); + PADDLE_ENFORCE(in_out_vars_.empty(), + platform::errors::InvalidArgument( + "The input-output variable pairs to be " + "inplaced should be initialized here.")); for (size_t i = 0; i < in_var_infos_.size(); ++i) { auto *in_var = exec_scope_->FindVar(in_var_infos_[i]->Name()); auto *out_var = exec_scope_->FindVar(out_var_names_[i]); - PADDLE_ENFORCE_NOT_NULL(in_var); - PADDLE_ENFORCE_NOT_NULL(out_var); - PADDLE_ENFORCE_NE(in_var, out_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "The input variable(%s)to be inplaced should not be NULL.", + in_var_infos_[i]->Name())); + PADDLE_ENFORCE_NOT_NULL( + out_var, + platform::errors::NotFound( + "The output variable(%s) to be inplaced should not be NULL.", + out_var_names_[i])); + PADDLE_ENFORCE_NE( + in_var, out_var, + platform::errors::PreconditionNotMet( + "The input variable and output variable to be inplaced " + "cannot be the same variable(%s).", + out_var_names_[i])); in_out_vars_.emplace_back(in_var, out_var); } } void ShareTensorBufferFunctor::operator()(Scope *exec_scope) { if (!exec_scope_) { - PADDLE_ENFORCE_NOT_NULL(exec_scope); + PADDLE_ENFORCE_NOT_NULL(exec_scope, + platform::errors::InvalidArgument( + "The given execution scope should not be NULL " + "if the cached scope is NULL.")); exec_scope_ = exec_scope; CallOnce(); } else { - PADDLE_ENFORCE(exec_scope_ == exec_scope, "Scope must be the same"); + PADDLE_ENFORCE_EQ(exec_scope_, exec_scope, + platform::errors::InvalidArgument( + "The given execution scope and the cached execution " + "scope should be the same.")); } for (size_t i = 0; i < in_var_infos_.size(); ++i) { diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index f06507257f1..b805ad3b072 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" + #include #include + #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" @@ -32,17 +34,25 @@ ComputationOpHandle *GetUniquePendingComputationOpHandle( for (ir::Node *pending_op : out_var->outputs) { auto &op = pending_op->Wrapper(); auto *compute_op = dynamic_cast(&op); - PADDLE_ENFORCE_NOT_NULL(compute_op); + PADDLE_ENFORCE_NOT_NULL( + compute_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should be ComputationOpHandle.")); if (result_op == nullptr) { result_op = compute_op; } else { - PADDLE_ENFORCE_EQ(result_op, compute_op); + PADDLE_ENFORCE_EQ( + result_op, compute_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should be the unique one.")); } } } - PADDLE_ENFORCE_NOT_NULL(result_op); + PADDLE_ENFORCE_NOT_NULL(result_op, + platform::errors::PreconditionNotMet( + "The pending OpHandle should not be NULL.")); return result_op; } diff --git a/paddle/fluid/framework/details/ssa_graph_executor.cc b/paddle/fluid/framework/details/ssa_graph_executor.cc index 71123f708e3..2723a46dcfa 100644 --- a/paddle/fluid/framework/details/ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/ssa_graph_executor.h" + #include "paddle/fluid/framework/details/fetch_async_op_handle.h" namespace paddle { @@ -27,8 +28,9 @@ void ClearFetchOp(ir::Graph* graph, std::vector* fetch_ops) { PADDLE_ENFORCE_EQ(dynamic_cast(op) != nullptr || dynamic_cast(op) != nullptr, true, - "The input ops of ClearFetchOp function should be " - "FetchOpHandle or FetchAsyncOpHandle."); + platform::errors::PreconditionNotMet( + "The input ops of ClearFetchOp function should be " + "FetchOpHandle or FetchAsyncOpHandle.")); for (auto& out_var : op->Node()->outputs) { graph->RemoveNode(out_var); } diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 92c3a0cd6b9..2ed52b3bd94 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h" + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/profiler.h" @@ -138,7 +139,10 @@ inline FetchResultType ThreadedSSAGraphExecutor::RunImpl( } } } - PADDLE_ENFORCE(ready_ops.empty()); + PADDLE_ENFORCE_EQ( + ready_ops.empty(), true, + platform::errors::Fatal("After the execution of computation graph, " + "there are unexecuted operators left.")); } // Wait FetchOps. @@ -165,9 +169,8 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( FetchResultType *fetch_data, bool return_merged) { std::unordered_map> fetched_vars; std::unordered_set local_ready_vars; - std::unordered_set fetch_tensor_set(fetch_tensors.begin(), - fetch_tensors.end()); - for (auto &fetch_var_name : fetch_tensor_set) { + + for (auto &fetch_var_name : fetch_tensors) { for (auto &var_map : graph_->Get(details::kGraphVars)) { auto it = var_map.find(fetch_var_name); if (it != var_map.end()) { @@ -231,7 +234,11 @@ void ThreadedSSAGraphExecutor::InsertFetchOps( ready_ops->insert(static_cast(op)); } } - PADDLE_ENFORCE_EQ(local_ready_vars.size(), 0); + PADDLE_ENFORCE_EQ( + local_ready_vars.size(), 0, + platform::errors::Fatal( + "The number of ready variables should be 0, but got %d.", + local_ready_vars.size())); } void ThreadedSSAGraphExecutor::InsertPendingOp( @@ -277,7 +284,9 @@ void ThreadedSSAGraphExecutor::PrepareOpDeps() { } } op_deps_->num_ops_ = ready_ops.size() + pending_ops.size(); - PADDLE_ENFORCE_GT(op_deps_->num_ops_, 0, "The graph doesn't have operators."); + PADDLE_ENFORCE_GT( + op_deps_->num_ops_, 0, + platform::errors::InvalidArgument("The graph doesn't have operators.")); for (auto ready_var : ready_vars) { pending_vars.erase(ready_var); diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index b8b584f2720..45fa3adbf14 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -14,6 +14,8 @@ #pragma once +#include // ThreadPool in thrird party + #include #include #include @@ -24,8 +26,6 @@ #include #include -#include // ThreadPool in thrird party - #include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/execution_strategy.h" diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h index 86428f8b761..bb38424d3ae 100644 --- a/paddle/fluid/framework/details/var_handle.h +++ b/paddle/fluid/framework/details/var_handle.h @@ -54,8 +54,10 @@ struct VarHandleBase { void AddOutput(OpHandleBase* out, ir::Node* node) { if (pending_ops_.find(out) == pending_ops_.end()) { - PADDLE_ENFORCE(out != nullptr, "The output of %s should not be nullptr", - this->Node()->Name()); + PADDLE_ENFORCE_NOT_NULL(out, + platform::errors::InvalidArgument( + "The output added to VarHandle %s is NULL.", + this->Node()->Name())); pending_ops_.insert(out); node_->outputs.push_back(node); } @@ -120,7 +122,10 @@ struct VarHandle : public VarHandleBase { bool HasEvent() { return has_event_; } const cudaEvent_t& GetEvent() { - PADDLE_ENFORCE(HasEvent(), "The event is not set."); + PADDLE_ENFORCE_EQ( + HasEvent(), true, + platform::errors::PreconditionNotMet( + "The cuda event is not set, maybe InitCUDA() is not called.")); return event_; } diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index 134f759081a..fba0c1bf463 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/variable_visitor.h" + #include "paddle/fluid/framework/selected_rows.h" namespace paddle { namespace framework { @@ -24,7 +25,9 @@ static void VisitVariable(Variable* var, Func* func) { } else if (var->IsType()) { (*func)(var->GetMutable()); } else { - PADDLE_THROW("Not supported type %s", ToTypeName(var->Type())); + PADDLE_THROW(platform::errors::Unimplemented( + "VisitVariable is not supported for type %s.", + ToTypeName(var->Type()))); } } @@ -35,7 +38,8 @@ static void VisitVariable(const Variable& var, Func* func) { } else if (var.IsType()) { (*func)(var.Get()); } else { - PADDLE_THROW("Not supported type %s", ToTypeName(var.Type())); + PADDLE_THROW(platform::errors::Unimplemented( + "VisitVariable is not supported for type %s.", ToTypeName(var.Type()))); } } @@ -50,7 +54,8 @@ struct TensorVisitor { template void operator()() { - PADDLE_THROW("Not Support to get LoDTensor from %s", typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "Getting tensor from type %s is not supported.", typeid(T).name())); } }; @@ -78,8 +83,8 @@ struct ShareDimsAndLoDVisitor { template void operator()(const T&) { - PADDLE_ENFORCE("ShareDimsAndLoD is not supported by type %s", - typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "ShareDimsAndLoD is not supported for type %s.", typeid(T).name())); } }; @@ -89,42 +94,54 @@ void VariableVisitor::ShareDimsAndLoD(const Variable& src, Variable* trg) { } struct EnforceShapeAndDTypeEQVisitor { - const Variable* trg_; + const Variable* dst_; void operator()(const LoDTensor& src) { - auto& tensor = trg_->Get(); - PADDLE_ENFORCE_EQ( - src.place().which(), tensor.place().which(), - "The Places of the two Variable must be all on CPU or all on GPU."); + auto& tensor = dst_->Get(); + PADDLE_ENFORCE_EQ(src.place().which(), tensor.place().which(), + platform::errors::PreconditionNotMet( + "The place type of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.type(), tensor.type(), - "The dtype of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.dims(), tensor.dims(), - "The dims of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The dtype of the two variables is not equal.")); + PADDLE_ENFORCE_EQ( + src.dims(), tensor.dims(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors is not equal.")); PADDLE_ENFORCE_EQ(src.lod(), tensor.lod(), - "The lod of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.layout(), tensor.layout(), - "The layout of the two Variable's tensor is not equal."); + platform::errors::PreconditionNotMet( + "The lod of the two variable is not equal.")); + PADDLE_ENFORCE_EQ( + src.layout(), tensor.layout(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors tensor is not equal.")); } void operator()(const SelectedRows& src) { - auto& selected_rows = trg_->Get(); - PADDLE_ENFORCE_EQ( - src.place().which(), selected_rows.place().which(), - "The Places of the two Variable must be all on CPU or all on GPU."); + auto& selected_rows = dst_->Get(); + PADDLE_ENFORCE_EQ(src.place().which(), selected_rows.place().which(), + platform::errors::PreconditionNotMet( + "The place type of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.value().type(), selected_rows.value().type(), - "The dtype of the two Variable is not equal."); - PADDLE_ENFORCE_EQ(src.value().layout(), selected_rows.value().layout(), - "The layout of the two Variable's tensor is not equal."); + platform::errors::PreconditionNotMet( + "The dtype of the two variables is not equal.")); + PADDLE_ENFORCE_EQ( + src.value().layout(), selected_rows.value().layout(), + platform::errors::PreconditionNotMet( + "The layout of the two variables' tensors is not equal.")); PADDLE_ENFORCE_EQ(src.height(), selected_rows.height(), - "The height of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The height of the two variables is not equal.")); PADDLE_ENFORCE_EQ(src.GetCompleteDims(), selected_rows.GetCompleteDims(), - "The dims of the two Variable is not equal."); + platform::errors::PreconditionNotMet( + "The dims of the two variables is not equal.")); } template void operator()(const T&) { - PADDLE_ENFORCE("EnforceShapeAndDTypeEQ is not supported by type %s", - typeid(T).name()); + PADDLE_THROW(platform::errors::Unimplemented( + "EnforceShapeAndDTypeEQ is not supported for type %s.", + typeid(T).name())); } }; -- GitLab From aa7835efeeb94a75d54c5e569a0f90fe06513a51 Mon Sep 17 00:00:00 2001 From: guofei <52460041+gfwm2013@users.noreply.github.com> Date: Mon, 21 Sep 2020 13:32:02 +0800 Subject: [PATCH 014/117] Correct the error in decorator.py (#27409) test=develop --- python/paddle/reader/decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index aadfb3f49ed..91a2a78203c 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -42,7 +42,7 @@ import paddle.compat as cpt # For more details, please refer to # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods # https://bugs.python.org/issue33725 -if sys.version_info >= (3, 8): +if sys.version_info >= (3, 8) and sys.platform == 'darwin': fork_context = multiprocessing.get_context('fork') else: fork_context = multiprocessing -- GitLab From 02606d45efa33b3b1d5932b5cbeb6d02844e0c1e Mon Sep 17 00:00:00 2001 From: huangxu96 <46740794+huangxu96@users.noreply.github.com> Date: Mon, 21 Sep 2020 13:35:37 +0800 Subject: [PATCH 015/117] Quant op dev (#25932) * Finished ChannelWiseQuantDequantAbsMaxOp and Passed unittests. * Finished channel-wise quantize strategy in imperative quantization. * Added Cuda code of ChannelWiseQuantDequantMaxAbsOP Add Cuda code of ChannelWiseQuantDequantMaxAbsOp * Add quant_axis for channel_wise quant. * fixed a bug in unnitests, which will not trigger axis = 1 case and cannot meet the coverage rate requirement. * Added some assert infomation and fixed some coding style mistakes. --- paddle/fluid/operators/fake_quantize_op.cc | 135 ++++++ paddle/fluid/operators/fake_quantize_op.cu | 89 +++- paddle/fluid/operators/fake_quantize_op.h | 31 ++ paddle/fluid/operators/fused/fusion_gru_op.cc | 1 + paddle/fluid/pybind/op_function_generator.cc | 1 + .../slim/quantization/imperative/qat.py | 11 +- .../slim/quantization/imperative/quant_nn.py | 112 ++++- .../contrib/slim/tests/test_imperative_qat.py | 1 - .../tests/test_imperative_qat_channelwise.py | 428 ++++++++++++++++++ .../tests/unittests/test_fake_quantize_op.py | 65 +++ 10 files changed, 861 insertions(+), 13 deletions(-) create mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index 04ac4a35208..e9b4c7dacf8 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -174,7 +174,64 @@ struct ChannelClipAndFakeQuantFunctor { template struct ChannelClipAndFakeQuantFunctor; +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const platform::CPUDeviceContext& ctx, + const framework::Tensor& in, const framework::Tensor& scale, + const int bin_cnt, const int quant_axis, + framework::Tensor* out) { + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + auto* scale_data = scale.data(); + auto* in_data = in.data(); + auto* out_data = out->mutable_data(ctx.GetPlace()); + auto in_dims = in.dims(); + const int64_t channel = in_dims[quant_axis]; + platform::Transform trans; + if (quant_axis == 0) { + const int64_t channel_size = in.numel() / channel; + for (int i = 0; i < channel; i++) { + T s = scale_data[i]; + auto* start = in_data + i * channel_size; + auto* end = in_data + (i + 1) * channel_size; + trans(ctx, start, end, out_data + i * channel_size, + ClipFunctor(-s, s)); + } + for (int i = 0; i < channel; i++) { + T s = scale_data[i]; + T inv_s = inverse(s); + framework::Tensor one_channel_out = out->Slice(i, i + 1); + auto out_e = framework::EigenVector::Flatten(one_channel_out); + out_e.device(*ctx.eigen_device()) = + (bin_cnt * inv_s * out_e).round() * s / static_cast(bin_cnt); + } + } else if (quant_axis == 1) { + const int64_t step_i = in.numel() / in_dims[0]; + const int64_t step_j = in.numel() / (in_dims[0] * in_dims[1]); + for (int i = 0; i < in_dims[0]; i++) { + for (int j = 0; j < in_dims[1]; j++) { + T s = scale_data[j]; + T inv_s = inverse(s); + auto* start = in_data + i * step_i + j * step_j; + auto* end = in_data + i * step_i + (j + 1) * step_j; + auto* cur_out_data = out_data + i * step_i + j * step_j; + trans(ctx, start, end, cur_out_data, ClipFunctor(-s, s)); + for (int k = 0; k < step_j; k++) { + cur_out_data[k] = std::round(bin_cnt * inv_s * cur_out_data[k]) * + s / static_cast(bin_cnt); + } + } + } + } + } +}; + +template struct ChannelClipFakeQuantDequantFunctor; template struct FindRangeAbsMaxFunctor { void operator()(const platform::CPUDeviceContext& ctx, @@ -360,6 +417,75 @@ $$0 \leq c \lt \ the\ channel\ number\ of\ X$$ } }; +class FakeChannelWiseQuantizeDequantizeAbsMaxOp + : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", + "FakeChannelWiseQuantizeDequantizeAbsMax"); + int quant_axis = ctx->Attrs().Get("quant_axis"); + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->SetOutputDim("OutScale", {ctx->GetInputDim("X")[quant_axis]}); + ctx->ShareLoD("X", /*->*/ "Out"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); + } +}; + +class FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker + : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) Input is float data type."); + AddOutput("Out", + "(Tensor) Output of quantized and dequantized low level tensor, " + "saved as float data type."); + AddOutput("OutScale", "(Tensor) Current channel wise scale"); + AddAttr("quant_axis", + "(int, default 0) The axis for quantization. " + "For conv2d, depthwise_conv2d, conv2d_transpose " + "and mul, the quant_axis is equal to the cout axis.") + .SetDefault(0) + .AddCustomChecker([](const int& quant_axis) { + PADDLE_ENFORCE_EQ(quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument( + "'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + }); + AddAttr("bit_length", "(int, default 8)") + .SetDefault(8) + .AddCustomChecker([](const int& bit_length) { + PADDLE_ENFORCE_EQ(bit_length >= 1 && bit_length <= 16, true, + platform::errors::InvalidArgument( + "'bit_length' should be between 1 and 16, but " + "the received is %d", + bit_length)); + }); + AddComment(R"DOC( +The scale of FakeChannelWiseQuantize operator is a vector. +In detail, each channel of the input X has a scale value. + +$$scale_c = max(abs(X_c))$$ +$$range = 2^{bit\_length - 1} - 1$$ +$$Out_c = round(\frac{X_c * range} {scale_c}) * \frac{scale_c} {range}$$ +In above three formulas, the range value of c is as follow: +$$0 \leq c \lt \ the\ channel\ number\ of\ X$$ +)DOC"); + } +}; + class FakeQuantizeRangeAbsMaxOp : public framework::OperatorWithKernel { public: FakeQuantizeRangeAbsMaxOp(const std::string& type, @@ -666,3 +792,12 @@ REGISTER_OP_CPU_KERNEL(moving_average_abs_max_scale, REGISTER_OPERATOR(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradOp); REGISTER_OP_CPU_KERNEL(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradKernel); + +REGISTER_OPERATOR(fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOp, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker, + ops::FakeQuantDequantGradMaker, + ops::FakeQuantDequantGradMaker); +REGISTER_OP_CPU_KERNEL( + fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 6ff3c7ec632..8bc14dde863 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -417,8 +417,90 @@ struct FindMovingAverageAbsMaxFunctor { } }; -template struct FindMovingAverageAbsMaxFunctor; +// ChannelClipAndQuantDequantKernel for quant_axis is 0 +template +__global__ void ChannelClipAndQuantDequantKernelQuantAxis0( + const T* in, const T* scale, const int bin_cnt, const int n, const int c, + T* out) { + int tid = threadIdx.x; + + int channel_size = n / c; + const T* in_c = in + blockIdx.x * channel_size; + T* out_c = out + blockIdx.x * channel_size; + + T s = scale[blockIdx.x]; + T inv_s = inverse(s); + + for (int i = tid; i < channel_size; i += blockDim.x) { + T x = in_c[i]; + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; + } +} + +// ChannelClipAndQuantDequantKernel for quant_axis is 1 +template +__global__ void ChannelClipAndQuantDequantKernelQuantAxis1( + const T* in, const T* scale, const int bin_cnt, const int n, const int cin, + const int cout, T* out) { + T s = scale[blockIdx.x % cout]; + T inv_s = inverse(s); + + int wh_size = n / (cin * cout); + const T* in_c = in + blockIdx.x * wh_size; + T* out_c = out + blockIdx.x * wh_size; + + for (int i = threadIdx.x; i < wh_size; i += blockDim.x) { + T x = in_c[i]; + T v = x > s ? s : x; + v = v < -s ? -s : v; + v = bin_cnt * inv_s * v; + out_c[i] = round(v) * s / bin_cnt; + } +} + +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const platform::CUDADeviceContext& ctx, + const framework::Tensor& in, const framework::Tensor& scale, + const int bin_cnt, const int quant_axis, + framework::Tensor* out) { + // At present, channelwise quantization supports conv2d, depthwise_conv2d + // conv2d_transpose and mul + PADDLE_ENFORCE_EQ( + quant_axis == 0 || quant_axis == 1, true, + platform::errors::InvalidArgument("'quant_axis' should be 0 or 1, but " + "the received is %d", + quant_axis)); + + int num = in.numel(); + auto in_dims = in.dims(); + + const T* in_data = in.data(); + const T* scale_data = scale.data(); + T* out_data = out->mutable_data(ctx.GetPlace()); + + if (quant_axis == 0) { + int grid = in_dims[0]; + int block = 1024; + ChannelClipAndQuantDequantKernelQuantAxis0< + T><<>>(in_data, scale_data, bin_cnt, + num, in_dims[0], out_data); + } else if (quant_axis == 1) { + int grid = in_dims[0] * in_dims[1]; + int block = 1024; + + ChannelClipAndQuantDequantKernelQuantAxis1< + T><<>>( + in_data, scale_data, bin_cnt, num, in_dims[0], in_dims[1], out_data); + } + } +}; + +template struct ChannelClipFakeQuantDequantFunctor; } // namespace operators } // namespace paddle @@ -443,3 +525,6 @@ REGISTER_OP_CUDA_KERNEL( ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel); REGISTER_OP_CUDA_KERNEL(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradKernel); +REGISTER_OP_CUDA_KERNEL( + fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 5c6e0b1f6e2..2f5afbe0eed 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -72,6 +72,13 @@ struct ChannelClipAndFakeQuantFunctor { const int quant_axis, framework::Tensor* out); }; +template +struct ChannelClipFakeQuantDequantFunctor { + void operator()(const DeviceContext& ctx, const framework::Tensor& in, + const framework::Tensor& scale, const int bin_cnt, + const int quant_axis, framework::Tensor* out); +}; + template struct FindMovingAverageAbsMaxFunctor { void operator()(const DeviceContext& ctx, const framework::Tensor& in_accum, @@ -154,6 +161,30 @@ class FakeChannelWiseQuantizeAbsMaxKernel : public framework::OpKernel { } }; +template +class FakeChannelWiseQuantizeDequantizeAbsMaxKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in = context.Input("X"); + auto* out = context.Output("Out"); + auto* out_scale = context.Output("OutScale"); + T* out_scale_data = out_scale->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + out->mutable_data(dev_ctx.GetPlace()); + + int bit_length = context.Attr("bit_length"); + int bin_cnt = std::pow(2, bit_length - 1) - 1; + int quant_axis = context.Attr("quant_axis"); + + FindChannelAbsMaxFunctor()(dev_ctx, *in, quant_axis, + out_scale_data); + + ChannelClipFakeQuantDequantFunctor()( + dev_ctx, *in, *out_scale, bin_cnt, quant_axis, out); + } +}; + template class FakeQuantizeRangeAbsMaxKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc index 40139066096..e3776a80b31 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.cc +++ b/paddle/fluid/operators/fused/fusion_gru_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_gru_op.h" #include // for memcpy #include +#include #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/fc.h" diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index f751136640c..d3052ebd351 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -111,6 +111,7 @@ std::map> op_passing_outs_map = { {"fake_quantize_dequantize_moving_average_abs_max", {"Out", "OutScale", "OutAccum", "OutState"}}, {"fake_quantize_dequantize_abs_max", {"Out", "OutScale"}}, + {"fake_channel_wise_quantize_dequantize_abs_max", {"Out", "OutScale"}}, {"check_finite_and_unscale", {"Out", "FoundInfinite"}}, {"update_loss_scaling", {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}}, diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 7b276293638..8d7ebcf4caa 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -99,7 +99,12 @@ class ImperativeQuantAware(object): self._activation_bits = activation_bits self._moving_rate = moving_rate - quant_type = {'abs_max', 'moving_average_abs_max'} + quant_type = { + 'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max' + } + + assert activation_quantize_type != 'channel_wise_abs_max', \ + "The activation quantization type does not support 'channel_wise_abs_max'." if activation_quantize_type not in quant_type: raise ValueError( "Unknown activation_quantize_type : '%s'. It can only be " @@ -108,8 +113,8 @@ class ImperativeQuantAware(object): if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be " - "'abs_max' or 'moving_average_abs_max' now." % - (str(weight_quantize_type))) + "'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now." + % (str(weight_quantize_type))) self._activation_quantize_type = activation_quantize_type self._weight_quantize_type = weight_quantize_type diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index e22c980b0a7..2e35ac288c7 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -24,7 +24,7 @@ from paddle.fluid.data_feeder import check_variable_and_dtype __all__ = [ 'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D', - 'QuantizedLinear' + 'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax' ] @@ -209,6 +209,89 @@ class FakeQuantAbsMax(layers.Layer): return quant_out +class FakeChannelWiseQuantDequantAbsMax(layers.Layer): + def __init__(self, + name=None, + channel_num=None, + quant_bits=8, + quant_axis=0, + dtype='float32', + quant_on_weight=False): + assert quant_on_weight == True, "Channel_wise only can be used on weight quantization." + super(FakeChannelWiseQuantDequantAbsMax, self).__init__() + self._quant_bits = quant_bits + self._quant_axis = quant_axis + self._dtype = dtype + self._name = name + self._channel_num = channel_num + scale_prefix = "{}.scale".format( + name) if name else 'quant_dequant.scale' + self._scale_name = unique_name.generate(scale_prefix) + if quant_on_weight: + scale_attr = ParamAttr( + name=self._scale_name, + initializer=Constant(0.0), + trainable=False) + self._scale = self.create_parameter( + shape=[self._channel_num], attr=scale_attr, dtype=self._dtype) + self._scale.stop_gradient = True + else: + self._scale = None + + def forward(self, input): + if in_dygraph_mode(): + attrs = ('bit_length', self._quant_bits, 'quant_axis', + self._quant_axis) + quant_out = _varbase_creator( + type=input.type, + name="{}.quantized.dequantized".format(input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) + + out_scale = self._scale + if out_scale is None: + out_scale = _varbase_creator( + type=core.VarDesc.VarType.LOD_TENSOR, + name=self._scale_name, + shape=[self._channel_num], + dtype=self._dtype, + persistable=False) + out_scale.stop_gradient = True + + out, _, = core.ops.fake_channel_wise_quantize_dequantize_abs_max( + input, quant_out, out_scale, *attrs) + return out + + check_variable_and_dtype(input, 'input', ['float32'], + "FakeChannelWiseQuantDequantAbsMax") + attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis} + inputs = {"X": [input]} + quant_out = self._helper.create_variable( + name="{}.quantized.dequantized".format(input.name), + dtype=input.dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + out_scale = self._scale + if not out_scale: + out_scale = self._helper.create_variable( + name=self._scale_name, + dtype=self._dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=True) + outputs = {"Out": [quant_out], "OutScale": [out_scale]} + + self._helper.append_op( + type="fake_channel_wise_quantize_dequantize_abs_max", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return quant_out + + def _get_fake_quant_type(quant_type, **kwargs): call_args = { "name": kwargs.get("name", None), @@ -220,10 +303,17 @@ def _get_fake_quant_type(quant_type, **kwargs): call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False) elif quant_type == 'moving_average_abs_max': call_args["moving_rate"] = kwargs.get("moving_rate", 0.9) - + elif quant_type == 'channel_wise_abs_max': + call_args["quant_on_weight"] = kwargs.get("quant_on_weight", False) + call_args["channel_num"] = kwargs.get("channel_num", None) + call_args["quant_axis"] = kwargs.get("quant_axis", 0) + assert call_args["channel_num"] is not None, ( + "You need to input channel_num" + "when you use channel_wise_abs_max strategy.") fake_quant_map = { 'abs_max': FakeQuantAbsMax, - 'moving_average_abs_max': FakeQuantMovingAverage + 'moving_average_abs_max': FakeQuantMovingAverage, + 'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax } return fake_quant_map[quant_type](**call_args) @@ -255,19 +345,23 @@ class QuantizedConv2D(layers.Layer): self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') # For FakeQuant + self._conv2d_quant_axis = 0 self._fake_quant_weight = _get_fake_quant_type( weight_quantize_type, name=self.weight.name, moving_rate=moving_rate, quant_bits=weight_bits, dtype=self._dtype, - quant_on_weight=True) + quant_on_weight=True, + channel_num=self.weight.shape[self._conv2d_quant_axis], + quant_axis=self._conv2d_quant_axis) self._fake_quant_input = _get_fake_quant_type( activation_quantize_type, name=layer.full_name(), moving_rate=moving_rate, quant_bits=activation_bits, - dtype=self._dtype) + dtype=self._dtype, + quant_on_weight=False) def forward(self, input): quant_input = self._fake_quant_input(input) @@ -341,19 +435,23 @@ class QuantizedLinear(layers.Layer): self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') # For FakeQuant + self._linear_quant_axis = 1 self._fake_quant_weight = _get_fake_quant_type( weight_quantize_type, name=self.weight.name, moving_rate=moving_rate, quant_bits=weight_bits, dtype=self._dtype, - quant_on_weight=True) + quant_on_weight=True, + channel_num=self.weight.shape[self._linear_quant_axis], + quant_axis=self._linear_quant_axis) self._fake_quant_input = _get_fake_quant_type( activation_quantize_type, name=layer.full_name(), moving_rate=moving_rate, quant_bits=activation_bits, - dtype=self._dtype) + dtype=self._dtype, + quant_on_weight=False) def forward(self, input): quant_input = self._fake_quant_input(input) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index f076d274b64..0d047a0cd3b 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -181,7 +181,6 @@ class TestImperativeQat(unittest.TestCase): img = fluid.dygraph.to_variable(x_data) label = fluid.dygraph.to_variable(y_data) - out = lenet(img) acc = fluid.layers.accuracy(out, label) loss = fluid.layers.cross_entropy(out, label) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py new file mode 100644 index 00000000000..17c613281a8 --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -0,0 +1,428 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function + +import os +import numpy as np +import random +import unittest +import logging +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import AdamOptimizer +from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware +from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass +from paddle.fluid.dygraph.container import Sequential +from paddle.fluid.dygraph.nn import Conv2D +from paddle.fluid.dygraph.nn import Pool2D +from paddle.fluid.dygraph.nn import Linear +from paddle.fluid.log_helper import get_logger + +os.environ["CPU_NUM"] = "1" +if core.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +def StaticLenet(data, num_classes=10, classifier_activation='softmax'): + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + conv1 = fluid.layers.conv2d( + data, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr) + pool1 = fluid.layers.pool2d( + conv1, pool_size=2, pool_type='max', pool_stride=2) + conv2 = fluid.layers.conv2d( + pool1, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr) + pool2 = fluid.layers.pool2d( + conv2, pool_size=2, pool_type='max', pool_stride=2) + + fc1 = fluid.layers.fc(input=pool2, + size=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr) + fc2 = fluid.layers.fc(input=fc1, + size=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr) + fc3 = fluid.layers.fc(input=fc2, + size=num_classes, + act=classifier_activation, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr) + + return fc3 + + +class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(ImperativeLenet, self).__init__() + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + self.features = Sequential( + Conv2D( + num_channels=1, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2), + Conv2D( + num_channels=6, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2)) + + self.fc = Sequential( + Linear( + input_dim=400, + output_dim=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr), + Linear( + input_dim=120, + output_dim=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr), + Linear( + input_dim=84, + output_dim=num_classes, + act=classifier_activation, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr)) + + def forward(self, inputs): + x = self.features(inputs) + + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + +class TestImperativeQat(unittest.TestCase): + """ + QAT = quantization-aware training + """ + + def test_qat_save(self): + imperative_qat = ImperativeQuantAware( + weight_quantize_type='channel_wise_abs_max', + activation_quantize_type='moving_average_abs_max') + + with fluid.dygraph.guard(): + lenet = ImperativeLenet() + imperative_qat.quantize(lenet) + adam = AdamOptimizer( + learning_rate=0.001, parameter_list=lenet.parameters()) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=32, drop_last=True) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32) + + epoch_num = 1 + for epoch in range(epoch_num): + lenet.train() + for batch_id, data in enumerate(train_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + out = lenet(img) + acc = fluid.layers.accuracy(out, label) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + if batch_id % 100 == 0: + _logger.info( + "Train | At epoch {} step {}: loss = {:}, acc= {:}". + format(epoch, batch_id, + avg_loss.numpy(), acc.numpy())) + + lenet.eval() + for batch_id, data in enumerate(test_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + acc_top1 = fluid.layers.accuracy( + input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy( + input=out, label=label, k=5) + + if batch_id % 100 == 0: + _logger.info( + "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". + format(epoch, batch_id, + acc_top1.numpy(), acc_top5.numpy())) + + # save weights + model_dict = lenet.state_dict() + fluid.save_dygraph(model_dict, "save_temp") + + # test the correctness of `paddle.jit.save` + data = next(test_reader()) + test_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + test_img = fluid.dygraph.to_variable(test_data) + lenet.eval() + before_save = lenet(test_img) + + # save inference quantized model + path = "./mnist_infer_model" + paddle.jit.save( + layer=lenet, + model_path=path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + [inference_program, feed_target_names, fetch_targets] = ( + fluid.io.load_inference_model( + dirname=path, + executor=exe, + model_filename="__model__", + params_filename="__variables__")) + after_save, = exe.run(inference_program, + feed={feed_target_names[0]: test_data}, + fetch_list=fetch_targets) + + self.assertTrue( + np.allclose(after_save, before_save.numpy()), + msg='Failed to save the inference quantized model.') + + def test_qat_acc(self): + def _build_static_lenet(main, startup, is_test=False, seed=1000): + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + main.random_seed = seed + startup.random_seed = seed + img = fluid.layers.data( + name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int64') + prediction = StaticLenet(img) + if not is_test: + loss = fluid.layers.cross_entropy( + input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + else: + avg_loss = prediction + return img, label, avg_loss + + reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32, drop_last=True) + weight_quantize_type = 'channel_wise_abs_max' + activation_quant_type = 'moving_average_abs_max' + param_init_map = {} + seed = 1000 + lr = 0.1 + + # imperative train + _logger.info( + "--------------------------dynamic graph qat--------------------------" + ) + imperative_qat = ImperativeQuantAware( + weight_quantize_type=weight_quantize_type, + activation_quantize_type=activation_quant_type) + + with fluid.dygraph.guard(): + np.random.seed(seed) + fluid.default_main_program().random_seed = seed + fluid.default_startup_program().random_seed = seed + lenet = ImperativeLenet() + fixed_state = {} + for name, param in lenet.named_parameters(): + p_shape = param.numpy().shape + p_value = param.numpy() + if name.endswith("bias"): + value = np.zeros_like(p_value).astype('float32') + else: + value = np.random.normal( + loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( + p_shape).astype('float32') + fixed_state[name] = value + param_init_map[param.name] = value + lenet.set_dict(fixed_state) + + imperative_qat.quantize(lenet) + adam = AdamOptimizer( + learning_rate=lr, parameter_list=lenet.parameters()) + dynamic_loss_rec = [] + lenet.train() + for batch_id, data in enumerate(reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + dynamic_loss_rec.append(avg_loss.numpy()[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', avg_loss.numpy())) + + paddle.jit.save( + layer=lenet, + model_path="./dynamic_mnist", + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + # static graph train + _logger.info( + "--------------------------static graph qat--------------------------" + ) + static_loss_rec = [] + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + + main = fluid.Program() + infer = fluid.Program() + startup = fluid.Program() + static_img, static_label, static_loss = _build_static_lenet( + main, startup, False, seed) + infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, + seed) + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + opt = AdamOptimizer(learning_rate=lr) + opt.minimize(static_loss) + + scope = core.Scope() + with fluid.scope_guard(scope): + exe.run(startup) + for param in main.all_parameters(): + param_tensor = scope.var(param.name).get_tensor() + param_tensor.set(param_init_map[param.name], place) + + main_graph = IrGraph(core.Graph(main.desc), for_test=False) + infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) + transform_pass = QuantizationTransformPass( + scope=scope, + place=place, + activation_quantize_type=activation_quant_type, + weight_quantize_type=weight_quantize_type, + quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) + transform_pass.apply(main_graph) + transform_pass.apply(infer_graph) + build_strategy = fluid.BuildStrategy() + build_strategy.fuse_all_reduce_ops = False + binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( + loss_name=static_loss.name, build_strategy=build_strategy) + + feeder = fluid.DataFeeder( + feed_list=[static_img, static_label], place=place) + with fluid.scope_guard(scope): + for batch_id, data in enumerate(reader()): + loss_v, = exe.run(binary, + feed=feeder.feed(data), + fetch_list=[static_loss]) + static_loss_rec.append(loss_v[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', loss_v)) + + save_program = infer_graph.to_program() + with fluid.scope_guard(scope): + fluid.io.save_inference_model("./static_mnist", [infer_img.name], + [infer_pre], exe, save_program) + rtol = 1e-05 + atol = 1e-08 + for i, (loss_d, + loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): + diff = np.abs(loss_d - loss_s) + if diff > (atol + rtol * np.abs(loss_s)): + _logger.info( + "diff({}) at {}, dynamic loss = {}, static loss = {}". + format(diff, i, loss_d, loss_s)) + break + + self.assertTrue( + np.allclose( + np.array(dynamic_loss_rec), + np.array(static_loss_rec), + rtol=rtol, + atol=atol, + equal_nan=True), + msg='Failed to do the imperative qat.') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 7835fd3f53d..01f0abe0f21 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -306,5 +306,70 @@ class TestFakeQuantDequantAbsOp(OpTest): self.check_grad(["X"], "Out", user_defined_grads=gradient) +class TestChannelWiseFakeQuantDequantOp(OpTest): + def setUp(self): + self.set_arg() + assert self.quant_axis in [0, 1], "quant_axis should be 0 or 1." + + self.op_type = "fake_channel_wise_quantize_dequantize_abs_max" + self.attrs = {'bit_length': 8, 'quant_axis': self.quant_axis} + + scales = [] + outputs = self.inputs['X'].copy() + range_v = (1 << (self.attrs['bit_length'] - 1)) - 1 + if self.quant_axis == 0: + for i in range(self.inputs['X'].shape[0]): + scale_v = np.max(np.abs(self.inputs['X'][i])).astype("float32") + scales.append(scale_v) + outputs[i] = np.round(outputs[i] * range_v / + scale_v) * scale_v / range_v + elif self.quant_axis == 1: + for i in range(self.inputs['X'].shape[1]): + scale_v = np.max(np.abs(self.inputs['X'][:, i])).astype( + "float32") + scales.append(scale_v) + outputs[:, i] = np.round(outputs[:, i] * range_v / + scale_v) * scale_v / range_v + + self.outputs = { + 'Out': outputs, + 'OutScale': np.array(scales).astype("float32"), + } + + def set_arg(self): + self.quant_axis = 0 + self.inputs = { + 'X': np.random.random((3, 4, 64, 64)).astype("float32"), + } + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + x = self.inputs["X"] + gradient = [np.ones(x.shape) / np.product(x.shape)] + self.check_grad(["X"], "Out", user_defined_grads=gradient) + + +class TestChannelWiseFakeQuantDequantOp1(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 1 + self.inputs = { + 'X': np.random.random((15, 20, 5, 5)).astype("float32"), + } + + +class TestChannelWiseFakeQuantDequantOp2(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 0 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + +class TestChannelWiseFakeQuantDequantOp3(TestChannelWiseFakeQuantDequantOp): + def set_arg(self): + self.quant_axis = 1 + self.inputs = {'X': np.random.random((30, 15)).astype("float32"), } + + if __name__ == "__main__": unittest.main() -- GitLab From 39546aa2f32788e1b55394739d46e47cd37fc232 Mon Sep 17 00:00:00 2001 From: Wilber Date: Mon, 21 Sep 2020 13:39:17 +0800 Subject: [PATCH 016/117] Add pass compatible and unit test. (#27377) --- .../ir/embedding_fc_lstm_fuse_pass.cc | 12 ++- paddle/fluid/framework/ir/fc_fuse_pass.cc | 8 ++ paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 22 ++++- .../fluid/framework/ir/fc_lstm_fuse_pass.cc | 15 ++++ .../framework/ir/squared_mat_sub_fuse_pass.cc | 30 +++++-- .../framework/ir/squared_mat_sub_fuse_pass.h | 2 +- .../inference/api/paddle_pass_builder.cc | 3 +- python/paddle/fluid/layers/tensor.py | 2 + .../ir/inference/test_fc_fuse_pass.py | 54 ++++++++++++ .../ir/inference/test_fc_gru_fuse_pass.py | 86 +++++++++++++++++++ .../ir/inference/test_fc_lstm_fuse_pass.py | 52 +++++++++++ .../test_squared_mat_sub_fuse_pass.py | 63 ++++++++++++++ ...test_transpose_flatten_concat_fuse_pass.py | 4 +- 13 files changed, 342 insertions(+), 11 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index c50b7476c6a..02e3e2542f6 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -23,6 +23,8 @@ #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/cpu_info.h" +#include "paddle/fluid/framework/op_version_registry.h" + namespace paddle { namespace framework { namespace ir { @@ -34,7 +36,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, // Build pattern PDNode* x = pattern->NewNode(patterns::PDNodeName(name_scope, "x")) - ->assert_is_op_input("lookup_table") + ->assert_is_op_input("lookup_table_v2") ->assert_var_not_persistable(); patterns::Embedding embedding_pattern(pattern, name_scope); // TODO(jczaja): Intermediate can only be for val that are not used anywhere @@ -256,3 +258,11 @@ void EmbeddingFCLSTMFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(embedding_fc_lstm_fuse_pass, paddle::framework::ir::EmbeddingFCLSTMFusePass); +REGISTER_PASS_CAPABILITY(embedding_fc_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("lookup_table_v2", 0) + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("lstm", 0) + .EQ("fused_embedding_fc_lstm", 0)); diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 066a8fb9757..d60510a4074 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -18,6 +18,7 @@ #include #include #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -182,3 +183,10 @@ int FCFusePass::ApplyFCPattern(Graph* graph, bool with_relu) const { REGISTER_PASS(fc_fuse_pass, paddle::framework::ir::FCFusePass) .RequirePassAttr("use_gpu"); +REGISTER_PASS_CAPABILITY(fc_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0) + .EQ("fc", 0)); diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index a2185cdc559..f5fea90ac2f 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -125,7 +126,6 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, auto* x_n = subgraph.at(x); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(mul, mul, fc_pattern); - GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(Weight, Weight, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(gru, gru, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(Bias, Bias, gru_pattern); @@ -136,10 +136,17 @@ static int BuildFusion(Graph* graph, const std::string& name_scope, gru_pattern); GET_IR_NODE_FROM_SUBGRAPH(BatchHidden, BatchHidden, gru_pattern); + // TODO(wilber): Support origin_mode=True. + if (gru->Op()->GetAttrIfExists("origin_mode") == true) { + LOG(INFO) << "fc_gru_fuse_pass not supported when origin_mode=True."; + return; + } + if (with_fc_bias) { GET_IR_NODE_FROM_SUBGRAPH(mul_out, mul_out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(elementwise_add, elementwise_add, fc_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fc_out, elementwise_add_out, fc_pattern); gru_creater(gru, x_n, w, Weight, Bias, Hidden, fc_bias); // Remove unneeded nodes. @@ -188,3 +195,16 @@ void FCGRUFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(mul_gru_fuse_pass, paddle::framework::ir::MulGRUFusePass); REGISTER_PASS(fc_gru_fuse_pass, paddle::framework::ir::FCGRUFusePass); +REGISTER_PASS_CAPABILITY(mul_gru_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("gru", 0) + .EQ("fusion_gru", 0)); +REGISTER_PASS_CAPABILITY(fc_gru_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("gru", 0) + .EQ("fusion_gru", 0)); diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 12c7fc051e2..a3c57e14e1a 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -196,3 +197,17 @@ void FCLstmFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(mul_lstm_fuse_pass, paddle::framework::ir::MulLstmFusePass); REGISTER_PASS(fc_lstm_fuse_pass, paddle::framework::ir::FCLstmFusePass); + +REGISTER_PASS_CAPABILITY(fc_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("lstm", 0) + .EQ("fusion_lstm", 0)); +REGISTER_PASS_CAPABILITY(mul_lstm_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("mul", 0) + .EQ("lstm", 0) + .EQ("fusion_lstm", 0)); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc index 035b198bdcc..d74843611cd 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc @@ -17,6 +17,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -77,7 +78,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, }; auto is_fusion_input_var = [=](Node* x, const std::string& arg_name) { - bool basic = var_is_op_input(x, "matmul", arg_name) && + bool basic = (var_is_op_input(x, "matmul_v2", arg_name) || + var_is_op_input(x, "matmul", arg_name)) && var_is_op_input(x, "square", "X"); if (!basic) { return false; @@ -88,7 +90,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, } auto* squared_x = squared_x_op->outputs[0]; bool next_is_matmul_from_arg = - var_is_op_input(squared_x, "matmul", arg_name) && + (var_is_op_input(squared_x, "matmul_v2", arg_name) || + var_is_op_input(squared_x, "matmul", arg_name)) && squared_x->outputs.size() == 1 && squared_x->outputs[0]->outputs.size() == 1; if (!next_is_matmul_from_arg) { @@ -103,7 +106,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto is_fusion_first_mul_out = [=](Node* x) -> bool { bool input_is_matmul_op = x && x->inputs.size() == 1 && x->inputs[0]->IsOp() && - x->inputs[0]->Op()->Type() == "matmul"; + (x->inputs[0]->Op()->Type() == "matmul_v2" || + x->inputs[0]->Op()->Type() == "matmul"); if (!input_is_matmul_op) { return false; } @@ -167,7 +171,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_xy_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && x->Op()->Type() == "matmul" && + return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_first_mul_out(x->outputs[0]); }, name_scope + "/matmul_xy_op"); @@ -189,7 +194,9 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto is_fusion_mat_squared_x_y_op_out = [=](Node* x) -> bool { bool basic = x && x->IsVar() && x->inputs.size() == 1 && - x->inputs[0]->IsOp() && x->inputs[0]->Op()->Type() == "matmul"; + x->inputs[0]->IsOp() && + (x->inputs[0]->Op()->Type() == "matmul_v2" || + x->inputs[0]->Op()->Type() == "matmul"); if (!basic) { return false; } @@ -206,7 +213,8 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_squared_x_y_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && x->Op()->Type() == "matmul" && + return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_mat_squared_x_y_op_out(x->outputs[0]); }, name_scope + "/matmul_squared_x_y_op"); @@ -378,3 +386,13 @@ void SquaredMatSubFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(squared_mat_sub_fuse_pass, paddle::framework::ir::SquaredMatSubFusePass); +REGISTER_PASS_CAPABILITY(squared_mat_sub_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("matmul", 0) + .EQ("matmul_v2", 0) + .EQ("square", 0) + .EQ("elementwise_mul", 0) + .EQ("elementwise_sub", 0) + .EQ("fill_constant", 0) + .EQ("fusion_squared_mat_sub", 0)); diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h index b6165a512ac..56b7ec9b843 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h @@ -24,7 +24,7 @@ namespace framework { namespace ir { /** - * Fuse ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar + * Fuse ( (A * B).^2 - (A.^2 * B.^2) ) .* scalar */ class SquaredMatSubFusePass : public FusePassBase { public: diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index c19e77d2714..19f52422b44 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -156,7 +156,8 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // "seqpool_concat_fuse_pass", // "seqpool_cvm_concat_fuse_pass", // // "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // + // TODO(wilber): fix correctness problem. + // "fc_lstm_fuse_pass", // "mul_lstm_fuse_pass", // "fc_gru_fuse_pass", // "mul_gru_fuse_pass", // diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 89acfc6075b..0ce7c098e2d 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -680,8 +680,10 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): if not isinstance(value, Variable): if dtype in ['int64', 'int32']: attrs['str_value'] = str(int(value)) + attrs['value'] = int(value) else: attrs['str_value'] = str(float(value)) + attrs['value'] = float(value) if in_dygraph_mode(): shape = utils.convert_shape_to_list(shape) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py new file mode 100644 index 00000000000..a62adcea3f9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_fuse_pass.py @@ -0,0 +1,54 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class FcFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 128, 768], dtype="float32") + data_y = fluid.data(name="y", shape=[-1, 128, 768], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=3072, + num_flatten_dims=2, + act="relu") + fc_out2 = fluid.layers.fc(input=fc_out1, + size=768, + num_flatten_dims=2) + + self.feeds = {"data": np.random.random((4, 128, 768)).astype("float32")} + self.fetch_list = [fc_out2] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + self.assertTrue(PassVersionChecker.IsCompatible('fc_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py new file mode 100644 index 00000000000..f7b43470d40 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_gru_fuse_pass.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class FcGruFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + data = fluid.data( + name='step_data', shape=[None], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim = 512 + x = fluid.layers.fc(input=emb, size=hidden_dim * 3) + hidden = fluid.layers.dynamic_gru( + input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"step_data": lod_tensor} + self.fetch_list = [hidden] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('fc_gru_fuse_pass')) + + +class MulGruFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + data = fluid.data( + name='step_data', shape=[None], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + hidden_dim = 512 + x = fluid.layers.fc(input=emb, size=hidden_dim * 3, bias_attr=False) + hidden = fluid.layers.dynamic_gru( + input=x, + size=hidden_dim, + bias_attr=True, + origin_mode=False, + is_reverse=True) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"step_data": lod_tensor} + self.fetch_list = [hidden] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('mul_gru_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py new file mode 100644 index 00000000000..fbb4373dae2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_fc_lstm_fuse_pass.py @@ -0,0 +1,52 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker + + +class MulLstmFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + dict_dim, emb_dim = 128, 64 + hidden_dim = 512 + + data = fluid.data( + name='data', shape=[1], dtype='int64', lod_level=1) + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + x = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False) + forward, cell = fluid.layers.dynamic_lstm( + input=x, size=hidden_dim * 4) + + batch = 16 + lod_tensor = fluid.LoDTensor() + lod_tensor.set(np.random.randint( + 0, dict_dim, size=[batch]).astype("int64"), + fluid.CPUPlace()) + lod_tensor.set_lod([[0, batch]]) + self.feeds = {"data": lod_tensor} + self.fetch_list = [forward, cell] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + self.assertTrue(PassVersionChecker.IsCompatible('mul_lstm_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py new file mode 100644 index 00000000000..5fa242df4e4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_squared_mat_sub_fuse_pass.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class SquaredMatSubFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data_a = fluid.data(name="data_a", shape=[128, 1], dtype="float32") + data_b = fluid.data(name="data_b", shape=[256, 1], dtype="float32") + + fc_a = fluid.layers.fc(data_a, size=256) + fc_b = fluid.layers.fc(data_b, size=64) + + data_a_square = paddle.square(fc_a) + data_b_square = paddle.square(fc_b) + + matmul_ab = paddle.matmul(fc_a, fc_b) + matmul_ab_square = paddle.square(matmul_ab) + matmul_square_ab = paddle.matmul(data_a_square, data_b_square) + + scale = paddle.fill_constant(shape=[1], value=0.5, dtype='float32') + + sub_val = paddle.elementwise_sub(matmul_ab_square, matmul_square_ab) + squared_mat_sub_out = fluid.layers.elementwise_mul(sub_val, scale) + + self.feeds = { + "data_a": np.random.random((128, 1)).astype("float32"), + "data_b": np.random.random((256, 1)).astype("float32") + } + self.fetch_list = [squared_mat_sub_out] + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + self.assertTrue( + PassVersionChecker.IsCompatible('squared_mat_sub_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py index 34a52e7aed3..83d4b7091cb 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_transpose_flatten_concat_fuse_pass.py @@ -75,7 +75,9 @@ class TransposeFlattenConcatFusePassWithAxisTest(InferencePassTest): use_gpu = True self.check_output_with_option(use_gpu) - PassVersionChecker.IsCompatible('transpose_flatten_concat_fuse_pass') + self.assertTrue( + PassVersionChecker.IsCompatible( + 'transpose_flatten_concat_fuse_pass')) if __name__ == "__main__": -- GitLab From c7e5cf16ba4529964a5d24ab8e2554a16070c411 Mon Sep 17 00:00:00 2001 From: zhangchunle Date: Mon, 21 Sep 2020 14:19:57 +0800 Subject: [PATCH 017/117] rm setup.py;test=document_fix (#27421) --- setup.py | 577 ------------------------------------------------------- 1 file changed, 577 deletions(-) delete mode 100644 setup.py diff --git a/setup.py b/setup.py deleted file mode 100644 index af558c2ef0b..00000000000 --- a/setup.py +++ /dev/null @@ -1,577 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import subprocess -import os -import os.path -import errno -import re -import shutil -import sys -import fnmatch -import errno -import platform - -from contextlib import contextmanager -from setuptools import Command -from setuptools import setup, Distribution, Extension -from setuptools.command.install import install as InstallCommandBase - - -class BinaryDistribution(Distribution): - def has_ext_modules(foo): - return True - - -RC = 0 - -ext_name = '.dll' if os.name == 'nt' else ('.dylib' if sys.platform == 'darwin' - else '.so') - - -def git_commit(): - try: - cmd = ['git', 'rev-parse', 'HEAD'] - git_commit = subprocess.Popen( - cmd, stdout=subprocess.PIPE, - cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() - except: - git_commit = 'Unknown' - git_commit = git_commit.decode() - return str(git_commit) - - -def _get_version_detail(idx): - assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ - so detail index must less than 3" - - if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'): - version_details = '@PADDLE_VERSION@'.split('.') - - if len(version_details) >= 3: - return version_details[idx] - - return 0 - - -def get_major(): - return int(_get_version_detail(0)) - - -def get_minor(): - return int(_get_version_detail(1)) - - -def get_patch(): - return str(_get_version_detail(2)) - - -def is_taged(): - try: - cmd = [ - 'git', 'describe', '--exact-match', '--tags', 'HEAD', '2>/dev/null' - ] - git_tag = subprocess.Popen( - cmd, stdout=subprocess.PIPE, - cwd="@PADDLE_SOURCE_DIR@").communicate()[0].strip() - git_tag = git_tag.decode() - except: - return False - - if str(git_tag).replace('v', '') == '@PADDLE_VERSION@': - return True - else: - return False - - -def write_version_py(filename='paddle/version.py'): - cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY -# -full_version = '%(major)d.%(minor)d.%(patch)s' -major = '%(major)d' -minor = '%(minor)d' -patch = '%(patch)s' -rc = '%(rc)d' -istaged = %(istaged)s -commit = '%(commit)s' -with_mkl = '%(with_mkl)s' - -def show(): - if istaged: - print('full_version:', full_version) - print('major:', major) - print('minor:', minor) - print('patch:', patch) - print('rc:', rc) - else: - print('commit:', commit) - -def mkl(): - return with_mkl -''' - commit = git_commit() - with open(filename, 'w') as f: - f.write(cnt % { - 'major': get_major(), - 'minor': get_minor(), - 'patch': get_patch(), - 'rc': RC, - 'version': '${PADDLE_VERSION}', - 'commit': commit, - 'istaged': is_taged(), - 'with_mkl': '@WITH_MKL@' - }) - - -write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') - - -def write_distributed_training_mode_py( - filename='paddle/fluid/incubate/fleet/parameter_server/version.py'): - cnt = '''from __future__ import print_function - -# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY - -from paddle.fluid.incubate.fleet.base.mode import Mode - -BUILD_MODE=Mode.%(mode)s - -def is_transpiler(): - return Mode.TRANSPILER == BUILD_MODE - -''' - - dirname = os.path.dirname(filename) - - try: - os.makedirs(dirname) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - with open(filename, 'w') as f: - f.write(cnt % - {'mode': 'PSLIB' if '${WITH_PSLIB}' == 'ON' else 'TRANSPILER'}) - - -write_distributed_training_mode_py( - filename='@PADDLE_BINARY_DIR@/python/paddle/fluid/incubate/fleet/parameter_server/version.py' -) - -packages = [ - 'paddle', - 'paddle.libs', - 'paddle.utils', - 'paddle.dataset', - 'paddle.reader', - 'paddle.distributed', - 'paddle.incubate', - 'paddle.incubate.complex', - 'paddle.incubate.complex.tensor', - 'paddle.distributed.fleet', - 'paddle.distributed.fleet.base', - 'paddle.distributed.fleet.meta_optimizers', - 'paddle.distributed.fleet.runtime', - 'paddle.distributed.fleet.dataset', - 'paddle.distributed.fleet.metrics', - 'paddle.distributed.fleet.proto', - 'paddle.distributed.fleet.utils', - 'paddle.framework', - 'paddle.jit', - 'paddle.fluid', - 'paddle.fluid.inference', - 'paddle.fluid.dygraph', - 'paddle.fluid.dygraph.dygraph_to_static', - 'paddle.fluid.dygraph.amp', - 'paddle.fluid.proto', - 'paddle.fluid.proto.profiler', - 'paddle.fluid.distributed', - 'paddle.fluid.layers', - 'paddle.fluid.dataloader', - 'paddle.fluid.contrib', - 'paddle.fluid.contrib.decoder', - 'paddle.fluid.contrib.quantize', - 'paddle.fluid.contrib.reader', - 'paddle.fluid.contrib.slim', - 'paddle.fluid.contrib.slim.quantization', - 'paddle.fluid.contrib.slim.quantization.imperative', - 'paddle.fluid.contrib.utils', - 'paddle.fluid.contrib.extend_optimizer', - 'paddle.fluid.contrib.mixed_precision', - 'paddle.fluid.contrib.layers', - 'paddle.fluid.transpiler', - 'paddle.fluid.transpiler.details', - 'paddle.fluid.incubate', - 'paddle.fluid.incubate.data_generator', - 'paddle.fluid.incubate.fleet', - 'paddle.fluid.incubate.checkpoint', - 'paddle.fluid.incubate.fleet.base', - 'paddle.fluid.incubate.fleet.parameter_server', - 'paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler', - 'paddle.fluid.incubate.fleet.parameter_server.pslib', - 'paddle.fluid.incubate.fleet.parameter_server.ir', - 'paddle.fluid.incubate.fleet.collective', - 'paddle.fluid.incubate.fleet.utils', - 'paddle.hapi', - 'paddle.vision', - 'paddle.vision.models', - 'paddle.vision.transforms', - 'paddle.vision.datasets', - 'paddle.text', - 'paddle.text.datasets', - 'paddle.incubate', - 'paddle.io', - 'paddle.optimizer', - 'paddle.nn', - 'paddle.nn.functional', - 'paddle.nn.layer', - 'paddle.nn.initializer', - 'paddle.nn.utils', - 'paddle.metric', - 'paddle.static', - 'paddle.static.nn', - 'paddle.tensor', -] - -with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f: - setup_requires = f.read().splitlines() - -# Note(wangzhongpu): -# When compiling paddle under python36, the dependencies belonging to python2.7 will be imported, resulting in errors when installing paddle -if sys.version_info >= (3, 6) and sys.version_info < (3, 7): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp -if sys.version_info >= (3, 5) and sys.version_info < (3, 6): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.5\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp -if sys.version_info >= (3, 7): - setup_requires_tmp = [] - for setup_requires_i in setup_requires: - if "<\"3.6\"" in setup_requires_i or "<=\"3.6\"" in setup_requires_i or "<\"3.5\"" in setup_requires_i or "<=\"3.5\"" in setup_requires_i or "<\"3.7\"" in setup_requires_i: - continue - setup_requires_tmp += [setup_requires_i] - setup_requires = setup_requires_tmp - -if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: - setup_requires += ['opencv-python'] - -# the prefix is sys.prefix which should always be usr -paddle_bins = '' - -if not '${WIN32}': - paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] -package_data = { - 'paddle.fluid': - ['${FLUID_CORE_NAME}' + ('.so' if os.name != 'nt' else '.pyd')] -} -if '${HAS_NOAVX_CORE}' == 'ON': - package_data['paddle.fluid'] += [ - 'core_noavx' + ('.so' if os.name != 'nt' else '.pyd') - ] - -package_dir = { - '': '${PADDLE_BINARY_DIR}/python', - # The paddle.fluid.proto will be generated while compiling. - # So that package points to other directory. - 'paddle.fluid.proto.profiler': '${PADDLE_BINARY_DIR}/paddle/fluid/platform', - 'paddle.fluid.proto': '${PADDLE_BINARY_DIR}/paddle/fluid/framework', - 'paddle.fluid': '${PADDLE_BINARY_DIR}/python/paddle/fluid', -} - -# put all thirdparty libraries in paddle.libs -libs_path = '${PADDLE_BINARY_DIR}/python/paddle/libs' - -package_data['paddle.libs'] = [] -package_data['paddle.libs'] = [('libwarpctc' - if os.name != 'nt' else 'warpctc') + ext_name] -shutil.copy('${WARPCTC_LIBRARIES}', libs_path) - -if '${WITH_MKL}' == 'ON': - shutil.copy('${MKLML_SHARED_LIB}', libs_path) - shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path) - package_data['paddle.libs'] += [ - ('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, - ('libiomp5' if os.name != 'nt' else 'libiomp5md') + ext_name - ] -else: - if os.name == 'nt': - # copy the openblas.dll - shutil.copy('${OPENBLAS_SHARED_LIB}', libs_path) - package_data['paddle.libs'] += ['openblas' + ext_name] - -if '${WITH_LITE}' == 'ON': - shutil.copy('${LITE_SHARED_LIB}', libs_path) - package_data['paddle.libs'] += ['libpaddle_full_api_shared' + ext_name] - -if '${WITH_PSLIB}' == 'ON': - shutil.copy('${PSLIB_LIB}', libs_path) - if os.path.exists('${PSLIB_VERSION_PY}'): - shutil.copy( - '${PSLIB_VERSION_PY}', - '${PADDLE_BINARY_DIR}/python/paddle/fluid/incubate/fleet/parameter_server/pslib/' - ) - package_data['paddle.libs'] += ['libps' + ext_name] - -if '${WITH_MKLDNN}' == 'ON': - if '${CMAKE_BUILD_TYPE}' == 'Release' and os.name != 'nt': - # only change rpath in Release mode. - # TODO(typhoonzero): use install_name_tool to patch mkl libs once - # we can support mkl on mac. - # - # change rpath of libdnnl.so.1, add $ORIGIN/ to it. - # The reason is that all thirdparty libraries in the same directory, - # thus, libdnnl.so.1 will find libmklml_intel.so and libiomp5.so. - command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" - if os.system(command) != 0: - raise Exception("patch libdnnl.so failed, command: %s" % command) - shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) - if os.name != 'nt': - shutil.copy('${MKLDNN_SHARED_LIB_1}', libs_path) - package_data['paddle.libs'] += ['libmkldnn.so.0', 'libdnnl.so.1'] - else: - package_data['paddle.libs'] += ['mkldnn.dll'] - -if '${WITH_XPU}' == 'ON': - # only change rpath in Release mode, - if '${CMAKE_BUILD_TYPE}' == 'Release': - if os.name != 'nt': - if "@APPLE@" == "1": - command = "install_name_tool -id \"@loader_path/\" ${XPU_API_LIB}" - else: - command = "patchelf --set-rpath '$ORIGIN/' ${XPU_API_LIB}" - if os.system(command) != 0: - raise Exception("patch ${XPU_API_LIB} failed, command: %s" % - command) - shutil.copy('${XPU_API_LIB}', libs_path) - shutil.copy('${XPU_RT_LIB}', libs_path) - shutil.copy('${XPU_SIM_LIB}', libs_path) - package_data['paddle.libs'] += [ - '${XPU_API_LIB_NAME}', '${XPU_RT_LIB_NAME}', '${XPU_SIM_LIB_NAME}' - ] - -# copy libfuild_framework.so to libs -if os.name != 'nt' and sys.platform != 'darwin': - paddle_framework_lib = '${FLUID_FRAMEWORK_SHARED_LIB}' - shutil.copy(paddle_framework_lib, libs_path) - package_data['paddle.libs'] += [ - ('libpaddle_framework' - if os.name != 'nt' else 'paddle_framework') + ext_name - ] - -# remove unused paddle/libs/__init__.py -if os.path.isfile(libs_path + '/__init__.py'): - os.remove(libs_path + '/__init__.py') -package_dir['paddle.libs'] = libs_path - -# change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it. -# The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and -# ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. -# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 -if '${CMAKE_BUILD_TYPE}' == 'Release': - if os.name != 'nt': - # only change rpath in Release mode, since in Debug mode, ${FLUID_CORE_NAME}.xx is too large to be changed. - if "@APPLE@" == "1": - command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' - else: - command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so' - # The dynamic library compiled under aarch64 is greater than 64M, - # and an oversize error will be reported when using patchelf. - if platform.machine() != 'aarch64': - if os.system(command) != 0: - raise Exception( - "patch ${FLUID_CORE_NAME}.%s failed, command: %s" % - (ext_name, command)) - -ext_modules = [Extension('_foo', ['stub.cc'])] -if os.name == 'nt': - # fix the path separator under windows - fix_package_dir = {} - for k, v in package_dir.items(): - fix_package_dir[k] = v.replace('/', '\\') - package_dir = fix_package_dir - ext_modules = [] -elif sys.platform == 'darwin': - ext_modules = [] - - -def find_files(pattern, root): - for dirpath, _, files in os.walk(root): - for filename in fnmatch.filter(files, pattern): - yield os.path.join(dirpath, filename) - - -headers = ( - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/framework')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/imperative')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/memory')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/platform')) + - list(find_files('*.h', '@PADDLE_SOURCE_DIR@/paddle/fluid/string')) + - list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/platform')) + - list(find_files('*.pb.h', '${PADDLE_BINARY_DIR}/paddle/fluid/framework')) + - list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) - + # errorMessage.pb for errormessage - ['${EIGEN_INCLUDE_DIR}/Eigen/Core'] + # eigen - list(find_files('*', '${EIGEN_INCLUDE_DIR}/Eigen/src')) + # eigen - list(find_files('*', '${EIGEN_INCLUDE_DIR}/unsupported/Eigen')) + # eigen - list(find_files('*', '${GFLAGS_INSTALL_DIR}/include')) + # gflags - list(find_files('*', '${GLOG_INSTALL_DIR}/include')) + # glog - list(find_files('*', '${BOOST_INCLUDE_DIR}/boost')) + # boost - list(find_files('*', '${XXHASH_INSTALL_DIR}/include')) + # xxhash - list(find_files('*', '${PROTOBUF_INCLUDE_DIR}')) + # protobuf - list(find_files('*', '${DLPACK_INCLUDE_DIR}')) + # dlpack - list(find_files('*.h', '${THREADPOOL_INCLUDE_DIR}'))) # threadpool - -if '${WITH_MKLDNN}' == 'ON': - headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn - -if '${WITH_GPU}' == 'ON': - headers += list(find_files( - '*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage - - -class InstallCommand(InstallCommandBase): - def finalize_options(self): - ret = InstallCommandBase.finalize_options(self) - self.install_headers = os.path.join(self.install_purelib, 'paddle', - 'include') - self.install_lib = self.install_platlib - return ret - - -class InstallHeaders(Command): - """Override how headers are copied. - """ - description = 'install C/C++ header files' - - user_options = [ - ('install-dir=', 'd', 'directory to install header files to'), - ('force', 'f', 'force installation (overwrite existing files)'), - ] - - boolean_options = ['force'] - - def initialize_options(self): - self.install_dir = None - self.force = 0 - self.outfiles = [] - - def finalize_options(self): - self.set_undefined_options( - 'install', ('install_headers', 'install_dir'), ('force', 'force')) - - def mkdir_and_copy_file(self, header): - if 'pb.h' in header: - install_dir = re.sub('${PADDLE_BINARY_DIR}/', '', header) - elif 'third_party' not in header: - # framework - install_dir = re.sub('@PADDLE_SOURCE_DIR@/', '', header) - else: - # third_party - install_dir = re.sub('${THIRD_PARTY_PATH}', 'third_party', header) - patterns = [ - 'eigen3/src/extern_eigen3', 'boost/src/extern_boost', - 'dlpack/src/extern_dlpack/include', 'install/protobuf/include', - 'install/gflags/include', 'install/glog/include', - 'install/xxhash/include', 'install/mkldnn/include', - 'threadpool/src/extern_threadpool' - ] - for pattern in patterns: - install_dir = re.sub(pattern, '', install_dir) - install_dir = os.path.join(self.install_dir, - os.path.dirname(install_dir)) - if not os.path.exists(install_dir): - self.mkpath(install_dir) - return self.copy_file(header, install_dir) - - def run(self): - # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows - if os.name == 'nt' or sys.platform == 'darwin': - if '${WITH_GPU}' == 'ON': - self.mkdir_and_copy_file( - '${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') - return - hdrs = self.distribution.headers - if not hdrs: - return - self.mkpath(self.install_dir) - for header in hdrs: - (out, _) = self.mkdir_and_copy_file(header) - self.outfiles.append(out) - - def get_inputs(self): - return self.distribution.headers or [] - - def get_outputs(self): - return self.outfiles - - -# we redirect setuptools log for non-windows -if sys.platform != 'win32': - - @contextmanager - def redirect_stdout(): - f_log = open('${SETUP_LOG_FILE}', 'w') - origin_stdout = sys.stdout - sys.stdout = f_log - yield - f_log = sys.stdout - sys.stdout = origin_stdout - f_log.close() -else: - - @contextmanager - def redirect_stdout(): - yield - - -if '${WITH_GPU}' == 'ON': - os.environ['PACKAGE_NAME'] = "paddlepaddle-gpu" -else: - os.environ['PACKAGE_NAME'] = "paddlepaddle" - -with redirect_stdout(): - setup( - name='${PACKAGE_NAME}', - version='${PADDLE_VERSION}', - description='Parallel Distributed Deep Learning', - install_requires=setup_requires, - packages=packages, - ext_modules=ext_modules, - package_data=package_data, - package_dir=package_dir, - scripts=paddle_bins, - distclass=BinaryDistribution, - headers=headers, - cmdclass={ - 'install_headers': InstallHeaders, - 'install': InstallCommand, - }, - entry_points={ - 'console_scripts': - ['fleetrun = paddle.distributed.fleet.launch:launch'] - }) - -# As there are a lot of files in purelib which causes many logs, -# we don't print them on the screen, and you can open `setup.py.log` -# for the full logs. -if os.path.exists('${SETUP_LOG_FILE}'): - os.system('grep -v "purelib" ${SETUP_LOG_FILE}') -- GitLab From 669efb98de55c617387603acebf875a669432706 Mon Sep 17 00:00:00 2001 From: LutaoChu <30695251+LutaoChu@users.noreply.github.com> Date: Mon, 21 Sep 2020 16:10:26 +0800 Subject: [PATCH 018/117] Fix bug: shapes of Topk outputs are wrong when the parameter k is Tensor Fix bug: shapes of Topk outputs are wrong when the parameter k is Tensor --- paddle/fluid/operators/top_k_v2_op.cc | 15 ++++++++++++--- .../fluid/tests/unittests/test_top_k_v2_op.py | 19 +++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/top_k_v2_op.cc b/paddle/fluid/operators/top_k_v2_op.cc index cc72d83411f..0e3fcced19e 100644 --- a/paddle/fluid/operators/top_k_v2_op.cc +++ b/paddle/fluid/operators/top_k_v2_op.cc @@ -32,7 +32,6 @@ class TopkV2Op : public framework::OperatorWithKernel { auto input_dims = ctx->GetInputDim("X"); const int& dim_size = input_dims.size(); - const int k = static_cast(ctx->Attrs().Get("k")); int axis = static_cast(ctx->Attrs().Get("axis")); PADDLE_ENFORCE_EQ((axis < dim_size) && (axis >= (-1 * dim_size)), true, "the axis of topk" @@ -41,8 +40,18 @@ class TopkV2Op : public framework::OperatorWithKernel { if (axis < 0) axis += dim_size; - PADDLE_ENFORCE_GE( - k, 1, "the attribute of k in the topk must >= 1, but received %d .", k); + int k; + auto k_is_tensor = ctx->HasInput("K"); + if (k_is_tensor) { + k = -1; + } else { + k = static_cast(ctx->Attrs().Get("k")); + PADDLE_ENFORCE_EQ(k >= 1, true, + "the attribute of k in the topk must >= 1 or be a " + "Tensor, but received %d .", + k); + } + PADDLE_ENFORCE_GE(input_dims.size(), 1, "input of topk must have >= 1d shape"); diff --git a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py index 54e7765c0fb..b9d96f329b5 100644 --- a/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_top_k_v2_op.py @@ -63,28 +63,28 @@ class TestTopkOp(OpTest): self.check_grad(set(['X']), 'Out') -class TestTopOp1(TestTopkOp): +class TestTopkOp1(TestTopkOp): def init_args(self): self.k = 3 self.axis = 0 self.largest = True -class TestTopOp2(TestTopkOp): +class TestTopkOp2(TestTopkOp): def init_args(self): self.k = 3 self.axis = 0 self.largest = False -class TestTopOp3(TestTopkOp): +class TestTopkOp3(TestTopkOp): def init_args(self): self.k = 4 self.axis = 0 self.largest = False -class TestTopOp4(TestTopkOp): +class TestTopkOp4(TestTopkOp): def init_args(self): self.k = 4 self.axis = 0 @@ -189,6 +189,8 @@ class TestTopKAPI(unittest.TestCase): result1 = paddle.topk(input_tensor, k=2) result2 = paddle.topk(input_tensor, k=2, axis=-1) result3 = paddle.topk(input_tensor, k=k_tensor, axis=1) + self.assertEqual(result3[0].shape, (6, -1, 8)) + self.assertEqual(result3[1].shape, (6, -1, 8)) result4 = paddle.topk(input_tensor, k=2, axis=1, largest=False) result5 = paddle.topk(input_tensor, k=2, axis=-1, largest=False) result6 = paddle.topk(large_input_tensor, k=1, axis=-1) @@ -239,6 +241,15 @@ class TestTopKAPI(unittest.TestCase): self.run_dygraph(place) self.run_static(place) + def test_errors(self): + paddle.disable_static() + x = paddle.to_tensor([1, 2, 3]) + with self.assertRaises(BaseException): + paddle.topk(x, k=-1) + + with self.assertRaises(BaseException): + paddle.topk(x, k=0) + if __name__ == "__main__": unittest.main() -- GitLab From aba759ba16422abf8cd39ae7e19d24f5997b9ade Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Mon, 21 Sep 2020 17:55:29 +0800 Subject: [PATCH 019/117] [Feature] Enhance inplace addto strategy for gradient accumulation in static graph (#27112) * support use add instead of sum to do gradient accumulation * add inplace addto pass * add grad_add op and inplace addto pass * remove debug code * code refine * fix bug when sereral sum ops inserts at same op_idx * fix Flags type * add addto attribute for conv3d * fix ut * code clean * fix type --- paddle/fluid/framework/details/CMakeLists.txt | 1 + .../fluid/framework/details/build_strategy.h | 4 + .../fluid/framework/details/op_handle_base.cc | 7 + .../fluid/framework/details/op_handle_base.h | 6 + .../details/share_tensor_buffer_functor.cc | 12 +- .../details/share_tensor_buffer_functor.h | 10 +- .../details/share_tensor_buffer_op_handle.cc | 9 +- .../details/share_tensor_buffer_op_handle.h | 5 +- .../ir/memory_optimize_pass/CMakeLists.txt | 2 + .../buffer_shared_inplace_op_pass.cc | 6 +- .../inplace_addto_op_pass.cc | 221 ++++++++++++++++++ .../memory_optimize_pass/memory_reuse_pass.cc | 11 +- .../memory_optimize_pass/memory_reuse_pass.h | 14 +- paddle/fluid/framework/operator.h | 8 + paddle/fluid/framework/parallel_executor.cc | 19 ++ paddle/fluid/operators/conv_cudnn_op.cu | 27 ++- paddle/fluid/operators/conv_op.cc | 10 + .../elementwise/elementwise_add_op.cc | 18 ++ .../elementwise/elementwise_add_op.cu | 7 + paddle/fluid/platform/flags.cc | 15 ++ .../pybind/global_value_getter_setter.cc | 3 +- paddle/fluid/pybind/pybind.cc | 6 + python/paddle/fluid/__init__.py | 1 + python/paddle/fluid/backward.py | 94 ++++++-- .../unittests/test_inplace_addto_strategy.py | 114 +++++++++ 25 files changed, 589 insertions(+), 41 deletions(-) create mode 100644 paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc create mode 100644 python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index a3cc4d1721e..8281ec21438 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -74,6 +74,7 @@ set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto eager_deletion_pass buffer_shared_inplace_op_pass buffer_shared_cross_op_memory_reuse_pass + inplace_addto_op_pass set_reader_device_info_utils add_reader_dependency_pass) cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ${SSA_GRAPH_EXECUTOR_DEPS}) diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 5388df6bc50..01d496d4ea7 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -19,6 +19,7 @@ #include #include #include + #include "boost/optional.hpp" #include "paddle/fluid/framework/ir/pass_builder.h" #include "paddle/fluid/framework/program_desc.h" @@ -119,6 +120,9 @@ struct BuildStrategy { // Turn on inplace by default. bool enable_inplace_{true}; + // Turn off inplace addto by default. + bool enable_addto_{false}; + // FIXME(zcd): is_distribution_ is a temporary field, because in pserver mode, // num_trainers is 1, so the current fields of build_strategy doesn't tell if // it's distributed model. diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 35fe5d631fb..459bcff5c0b 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/op_handle_base.h" + #include #include @@ -88,6 +89,12 @@ void OpHandleBase::Run(bool use_cuda) { PADDLE_ENFORCE(!use_cuda); #endif + // skip running current op, used with inplace_addto_op_pass + if (skip_running_) { + VLOG(4) << "skip running: " << Name(); + return; + } + RunImpl(); } diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index c5aa1295aad..097f54d5d58 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/platform/device_context.h" @@ -52,6 +53,10 @@ class OpHandleBase { virtual Priority GetPriority() const { return kNormal; } + virtual bool GetSkipRunning() const { return skip_running_; } + + virtual void SetSkipRunning(bool skip_runing) { skip_running_ = skip_runing; } + virtual std::string Name() const = 0; void Run(bool use_cuda); @@ -131,6 +136,7 @@ class OpHandleBase { std::map dev_ctxes_; std::vector local_exec_scopes_; + bool skip_running_ = false; #ifdef PADDLE_WITH_CUDA std::unordered_map events_; diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index 19f075018ce..5fbaf3cbfe0 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -48,12 +48,13 @@ static inline Tensor *GetMutableTensorFromVar(Variable *var) { ShareTensorBufferFunctor::ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names) + const std::vector &out_var_names, bool share_dims) : scope_(scope), scope_idx_(scope_idx), op_type_(op_type), in_var_infos_(in_var_infos), - out_var_names_(out_var_names) { + out_var_names_(out_var_names), + share_dims_(share_dims) { PADDLE_ENFORCE_EQ(in_var_infos_.size(), out_var_names_.size(), platform::errors::PreconditionNotMet( "The number of input variables and output variables " @@ -151,6 +152,13 @@ void ShareTensorBufferFunctor::operator()(Scope *exec_scope) { } else { out_tensor->ShareBufferWith(in_tensor); + // NOTE(zhiqiu): In the case of inplace addto, if the operator of + // the in_out_vars is skipped during running, we should set the dims of + // output as the same as input. + if (share_dims_) { + out_tensor->Resize(in_tensor.dims()); + } + VLOG(2) << "Share tensor buffer when running " << op_type_ << " : " << in_var_info->Name() << " -> " << out_var_names_[i]; } diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index 774dcd056e5..be49d1c432b 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/scope.h" @@ -40,11 +41,13 @@ class ShareTensorBufferFunctor { ShareTensorBufferFunctor( Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names); + const std::vector &out_var_names, bool share_dims = false); void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); + void SetShareDims(bool share_dims) { share_dims_ = share_dims; } + void operator()(Scope *exec_scope); std::unordered_map ReusedVars() const; @@ -66,6 +69,11 @@ class ShareTensorBufferFunctor { std::vector out_var_names_; std::vector> in_out_vars_; + + // NOTE(zhiqiu): In the case of inplace addto, if the operator of + // the in_out_vars is skipped during running, we should set the dims of output + // as the same as input. + bool share_dims_{false}; }; } // namespace details diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index b805ad3b072..be3f5515a97 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -59,9 +59,10 @@ ComputationOpHandle *GetUniquePendingComputationOpHandle( ShareTensorBufferOpHandle::ShareTensorBufferOpHandle( ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_var_infos, - const std::vector &out_var_names) + const std::vector &out_var_names, bool share_dims) : OpHandleBase(node), - functor_(scope, scope_idx, op_type, in_var_infos, out_var_names) {} + functor_(scope, scope_idx, op_type, in_var_infos, out_var_names, + share_dims) {} std::unordered_map ShareTensorBufferOpHandle::ReusedVars() const { @@ -73,6 +74,10 @@ void ShareTensorBufferOpHandle::AddReuseVarPair( functor_.AddReuseVarPair(in_var_info, out_var_name); } +void ShareTensorBufferOpHandle::SetShareDims(bool share_dims) { + functor_.SetShareDims(share_dims); +} + void ShareTensorBufferOpHandle::InitCUDA() { #ifdef PADDLE_WITH_CUDA int dev_id = diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h index b22f5621fe4..a02c346485e 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" @@ -31,7 +32,7 @@ class ShareTensorBufferOpHandle : public OpHandleBase { ir::Node *node, Scope *scope, size_t scope_idx, const std::string &op_type, const std::vector &in_vars_infos, - const std::vector &out_var_names); + const std::vector &out_var_names, bool share_dims = false); std::unordered_map ReusedVars() const; @@ -42,6 +43,8 @@ class ShareTensorBufferOpHandle : public OpHandleBase { void AddReuseVarPair(const ir::MemOptVarInfo *in_var_info, const std::string &out_var_name); + void SetShareDims(bool share_dims); + const ShareTensorBufferFunctor &Functor() const { return functor_; } protected: diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index 726a2d90fcf..a8c0973cac4 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -13,4 +13,6 @@ cc_library(memory_reuse_pass SRCS memory_reuse_pass.cc DEPS computation_op_handl cc_library(buffer_shared_inplace_op_pass SRCS buffer_shared_inplace_op_pass.cc DEPS memory_reuse_pass) cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_memory_reuse_pass.cc DEPS memory_reuse_pass) +cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass) + cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op) diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index 0b42f2ebd55..ce7f27d2755 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" @@ -141,11 +142,12 @@ void BufferSharedInplaceOpPass::Run(Graph *graph) const { VLOG(4) << "Inplace performed in op " << op_type << ": " << in_var_handle_ptr->Name() << " -> " << out_var_handle_ptr->Name() - << ". Debug String is: " << op->GetOp()->DebugString(); + << ". Debug String is: " << op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); } else { VLOG(3) << "Inplace failed in op " << op_type << ": " << in_var_handle_ptr->Name() << " -> " - << out_var_handle_ptr->Name(); + << out_var_handle_ptr->Name() << ". ReuseType: " << ReuseType(); } } } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc new file mode 100644 index 00000000000..81c63f46bda --- /dev/null +++ b/paddle/fluid/framework/ir/memory_optimize_pass/inplace_addto_op_pass.cc @@ -0,0 +1,221 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "paddle/fluid/framework/details/computation_op_handle.h" +#include "paddle/fluid/framework/details/multi_devices_helper.h" +#include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" +#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class InplaceAddToOpPass : public MemoryReusePass { + protected: + std::string ReuseType() const override { return "inplace_addto"; } + + void Run(Graph *graph) const override; + + private: + // 1. Add last living op of in_var, add any last living op of out_var + // 2. Set reference count of in_var to be 2 + void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, + details::VarHandle *in_var, + details::VarHandle *out_var) const override { + size_t scope_idx = op->GetScopeIdx(); + auto *last_live_ops_of_vars_ = + &Get>(kLastLiveOpsOfVars); + auto *var_infos_ = &(Get(kMemOptVarInfoMapList)); + auto out_var_op_iter = + (*last_live_ops_of_vars_)[scope_idx].find(out_var->Name()); + + // In Reduce mode, some output variable(gradient of parameter) does not have + // last live ops + details::ComputationOpHandle *last_live_op_of_in_var = nullptr; + if (out_var_op_iter == (*last_live_ops_of_vars_)[scope_idx].end()) { + last_live_op_of_in_var = op; + } else { + PADDLE_ENFORCE_EQ( + out_var_op_iter->second.ops().empty(), false, + platform::errors::InvalidArgument( + "Var(%s)'s last live op should not empty.", out_var->Name())); + last_live_op_of_in_var = *(out_var_op_iter->second.ops().begin()); + } + + auto *last_live_ops_of_in_var = + (*last_live_ops_of_vars_)[scope_idx][in_var->Name()].mutable_ops(); + // last_live_ops_of_in_var->clear(); + last_live_ops_of_in_var->insert(last_live_op_of_in_var); + + auto in_var_info_iter = (*var_infos_)[scope_idx].find(in_var->Name()); + PADDLE_ENFORCE_NE( + in_var_info_iter, (*var_infos_)[scope_idx].end(), + platform::errors::NotFound("Cannot find variable %s.", in_var->Name())); + + in_var_info_iter->second->SetRefCnt(2); // before inplace, it is 1 + } +}; + +void InplaceAddToOpPass::Run(Graph *graph) const { + const auto &last_live_ops = + Get>(kLastLiveOpsOfVars); + + bool use_cuda = Get(kUseCuda); + + // Currently, only perform InplaceAddToOpPass on cuda place + if (!use_cuda) { + return; + } + + // Step 1: Build a reverse map of last_live_ops + // i.e.: op -> vars + std::unordered_map> + candidate_ops; + for (auto &each_scope_ops : last_live_ops) { + for (auto &pair : each_scope_ops) { + // If variable has more than 1 last lived ops, this variable cannot + // be inplaced. + if (pair.second.ops().size() != 1) { + continue; + } + + auto *op = *(pair.second.ops().begin()); + const std::string &op_type = op->GetOp()->Type(); + const framework::OpDesc *op_desc = op->Node()->Op(); + PADDLE_ENFORCE_NOT_NULL( + op_desc, platform::errors::NotFound("Op(%s) can not find opdesc.", + op->Name())); + + // only grad op should be processed. + if (op_type != "grad_add") { + continue; + } + + const std::string &var_name = pair.first; + auto in_nodes = this->FindNodesByName(var_name, op->Node()->inputs); + if (in_nodes.size() == 1) { + candidate_ops[op][var_name] = *in_nodes.begin(); + } + VLOG(4) << "Find op " << op_type << " with input(" << var_name + << ") that can do inplace add to"; + } + } + + // Step 2: Check which vars can be inplaced indeed + for (auto &op_vars_pair : candidate_ops) { + auto *op = op_vars_pair.first; + + // The original gradient accumulation is g = sum(g_0, g_1,..., g_n), and it + // could be changed as follws if inplace addto is enabled: + // g_sum_0 = g_0 + // g_sum_1 = grad_add(g_sum_0, g_1) + // g_sum_2 = grad_add(g_sum_1, g_2) + // ... + // g_sum_n = grad_add(g_sum_n-1, g_n) + + // here we will add inplace for each grad_add, for example, for the first + // grad_add, g_sum_0 -> g1, g_sum_1 -> g1, and set grad_add as skipped. + + const std::string &op_type = op->GetOp()->Type(); + + PADDLE_ENFORCE_EQ(op->Node()->inputs.size(), 2, + platform::errors::InvalidArgument( + "The size of inputs of %s should be 2, but got %d", + op_type, op->Node()->inputs.size())); + + PADDLE_ENFORCE_EQ(op->Node()->outputs.size(), 1, + platform::errors::InvalidArgument( + "The size of outputs of %s should be 1, but got %d", + op_type, op->Node()->outputs.size())); + + auto *left_var_ptr = dynamic_cast( + &(op->Node()->inputs[0]->Wrapper())); + auto *right_var_ptr = dynamic_cast( + &(op->Node()->inputs[1]->Wrapper())); + auto *out_var_ptr = dynamic_cast( + &(op->Node()->outputs[0]->Wrapper())); + + if (left_var_ptr == nullptr || right_var_ptr == nullptr || + out_var_ptr == nullptr) { + continue; + } + + // auto *left_generated_op = dynamic_cast( + // left_var_ptr->GeneratedOp()); + + auto *right_generated_op = dynamic_cast( + right_var_ptr->GeneratedOp()); + + auto *out_generated_op = dynamic_cast( + out_var_ptr->GeneratedOp()); + + // NOTE(zhiqiu): currently, only conv2d_grad supports addto strategy + if (right_generated_op->Name() != "conv2d_grad") { + continue; + } + + // NOTE(zhiqiu): Normally, if we inplace a->b, we should let a generated + // before b. However, in the situation of inplace addto, we do not care + // the order, since a+b is equal to b+a. Is there any exception for that? + + // AddDependencyVar(right_generated_op, left_generated_op); + // no need, as discussed above. + + // step (a): inplace right_var->left_var of grad_add + + this->AddReuseVar(right_generated_op, left_var_ptr, right_var_ptr); + UpdateLastLiveOpOfVar(right_generated_op, left_var_ptr, right_var_ptr); + VLOG(4) << "Inplace performed in op " << right_generated_op->GetOp()->Type() + << ": " << left_var_ptr->Name() << " -> " << right_var_ptr->Name() + << ". Debug String is: " + << right_generated_op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); + + // step (b): inplace out -> right_var of grad_add + + this->AddReuseVar(out_generated_op, right_var_ptr, out_var_ptr, true); + + VLOG(4) << "Inplace performed in op " << op_type << ": " + << left_var_ptr->Name() << " -> " << out_var_ptr->Name() + << ". Debug String is: " << op->GetOp()->DebugString() + << ". ReuseType: " << ReuseType(); + + // step (c): make right_var cannot inplace afterwards. canbe done + // aotomatically since CollectReusedVars is called before any reuse. + + // step (d): make right_var's generated op use addto + right_generated_op->GetOp()->SetAttr("use_addto", true); + + // step (e): make grad_add skip running + op->SetSkipRunning(true); + } +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(inplace_addto_op_pass, paddle::framework::ir::InplaceAddToOpPass) + .RequirePassAttr(paddle::framework::ir::kMemOptVarInfoMapList) + .RequirePassAttr(paddle::framework::ir::kLastLiveOpsOfVars) + .RequirePassAttr(paddle::framework::ir::kUseCuda); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc index 221b0a76e7e..3e3b9864a7b 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" + #include #include #include @@ -73,6 +74,7 @@ bool MemoryReusePass::TryReuseVar(details::VarHandle *in_var, out_var->Name())); if (IsVarPairReusable(*in_var, *out_var)) { AddReuseVar(op, in_var, out_var); + UpdateLastLiveOpOfVar(op, in_var, out_var); return true; } else { return false; @@ -324,7 +326,8 @@ bool MemoryReusePass::IsVarPairReusable( void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, - details::VarHandle *out_var) const { + details::VarHandle *out_var, + bool share_dims) const { PADDLE_ENFORCE_GT( (*var_infos_)[op->GetScopeIdx()].count(in_var->Name()), 0, platform::errors::NotFound("Var(%s) does not in mem opt var infos.", @@ -344,13 +347,15 @@ void MemoryReusePass::AddReuseVar(details::ComputationOpHandle *op, share_buffer_op->AddInput(in_var); } + if (share_dims) { + share_buffer_op->SetShareDims(true); + } + share_buffer_op->AddReuseVarPair( (*var_infos_)[op->GetScopeIdx()].at(in_var->Name()).get(), out_var->Name()); reused_in_var_names_[op->GetScopeIdx()].insert(in_var->Name()); reused_out_var_names_[op->GetScopeIdx()].insert(out_var->Name()); - - UpdateLastLiveOpOfVar(op, in_var, out_var); } // 1. Set last living op of in_var to be any last living op of out_var diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h index 82274419184..1c0c6ae6020 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" @@ -92,6 +93,12 @@ class MemoryReusePass : public Pass { int64_t GetMemorySize(const details::VarHandle &var) const; + void AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, + details::VarHandle *out_var, bool share_dims = false) const; + virtual void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, + details::VarHandle *in_var, + details::VarHandle *out_var) const; + private: VarDesc *GetVarDesc(const details::VarHandle &var) const; @@ -109,13 +116,6 @@ class MemoryReusePass : public Pass { void CollectReusedVars() const; - void AddReuseVar(details::ComputationOpHandle *op, details::VarHandle *in_var, - details::VarHandle *out_var) const; - - void UpdateLastLiveOpOfVar(details::ComputationOpHandle *op, - details::VarHandle *in_var, - details::VarHandle *out_var) const; - private: mutable Graph *graph_; mutable bool use_cuda_; diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index ebecbf0498c..bd52d7ffef5 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -157,6 +157,14 @@ class OperatorBase { platform::errors::NotFound("(%s) is not found in AttributeMap.", name)); return BOOST_GET_CONST(T, attrs_.at(name)); } + void SetAttr(const std::string& name, const Attribute& v) { + PADDLE_ENFORCE_EQ( + HasAttr(name), true, + platform::errors::NotFound( + "The attribute %s is not found in operator %s", name, Type())); + + attrs_[name] = v; + } const AttributeMap& Attrs() const { return attrs_; } const VariableNameMap& Inputs() const { return inputs_; } diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 12e0f97f126..535ec9cd7d9 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -13,12 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/parallel_executor.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/details/async_ssa_graph_executor.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" @@ -108,6 +110,11 @@ class ParallelExecutorPrivate { * them. */ inline void SetSkipMemoryReuse(size_t scope_idx, const std::string &name) { + if (mem_opt_var_infos_.size() == 0) { + VLOG(4) << "The mem_opt_var_infos_ is empty, maybe no memory " + "optimization strategy is enabled"; + return; + } auto iter = mem_opt_var_infos_[scope_idx].find(name); if (iter != mem_opt_var_infos_[scope_idx].end()) { iter->second->SetSkipMemoryReuse(true); @@ -308,6 +315,7 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { } bool need_mem_opt = build_strategy_.enable_inplace_ || + build_strategy_.enable_addto_ || build_strategy_.memory_optimize_.get() || is_gc_enabled; if (!need_mem_opt) return graph; @@ -320,6 +328,16 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { graph = ref_cnt_pass->Apply(graph); VLOG(10) << "ReferenceCountPass Applied"; + if (build_strategy_.enable_addto_) { + auto addto_pass = ir::PassRegistry::Instance().Get("inplace_addto_op_pass"); + addto_pass->SetNotOwned(ir::kMemOptVarInfoMapList, &mem_opt_var_infos_); + addto_pass->SetNotOwned(ir::kLastLiveOpsOfVars, &last_live_ops_of_vars); + addto_pass->SetNotOwned(ir::kUseCuda, &use_cuda_); + VLOG(10) << "Start to apply inplace_addto_op_pass"; + graph = addto_pass->Apply(graph); + VLOG(10) << "inplace_addto_op_pass Applied"; + } + if (build_strategy_.enable_inplace_) { auto inplace_pass = ir::PassRegistry::Instance().Get("buffer_shared_inplace_pass"); @@ -1068,3 +1086,4 @@ USE_PASS(reference_count_pass); USE_PASS(eager_deletion_pass); USE_PASS(buffer_shared_inplace_pass); USE_PASS(buffer_shared_cross_op_memory_reuse_pass); +USE_PASS(inplace_addto_op_pass); diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index 7f705755915..00af724ac7f 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" @@ -287,7 +288,9 @@ class CUDNNConvOpKernel : public framework::OpKernel { #endif // ------------------- cudnn conv forward --------------------- - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + VLOG(4) << "Conv: use_addto = " << ctx.Attr("use_addto"); for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* workspace_ptr) { @@ -609,9 +612,13 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { } // ------------------- cudnn conv backward data --------------------- - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : 0.0f; + VLOG(4) << "Conv_grad: use_addto = " << ctx.Attr("use_addto"); + if (input_grad) { - // Because beta is zero, it is unnecessary to reset input_grad. + // When beta is 0, it is unnecessary to reset input_grad. + // When beta is 1, the output cannot be reset since addt strategy used. for (int i = 0; i < groups; i++) { workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { @@ -653,6 +660,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { ctx, &transformed_input_grad_channel, input_grad); } } + + // filter_grad do not use inplace addto. + ScalingParamType beta_filter = 0.0f; // ------------------- cudnn conv backward filter --------------------- if (filter_grad) { // Because beta is zero, it is unnecessary to reset filter_grad. @@ -665,7 +675,7 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { input_data + i * group_offset_in, args2.odesc.desc(), output_grad_data + i * group_offset_out, args2.cdesc.desc(), filter_algo, cudnn_workspace_ptr, - workspace_size, &beta, args2.wdesc.desc(), + workspace_size, &beta_filter, args2.wdesc.desc(), filter_grad_data + i * group_offset_filter)); }, workspace_size); @@ -1017,7 +1027,14 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { int group_offset_out = o_c / groups * o_h * o_w * o_d; int group_offset_filter = W->numel() / groups; - ScalingParamType alpha = 1.0f, beta = 0.0f; + ScalingParamType alpha = 1.0f; + ScalingParamType beta = 0.0f; + + // NOTE(zhiqiu): inplace addto is not supportted in double grad yet. + // ScalingParamType beta = ctx.Attr("use_addto") ? 1.0f : + // 0.0f; + // VLOG(4) << "Conv_grad_grad: use_addto = " << ctx.Attr("use_addto"); + auto wkspace_handle = dev_ctx.cudnn_workspace_handle(); if (ddO) { diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index 9ed169fe350..bf97b9d03c4 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -305,6 +305,11 @@ void Conv2DOpMaker::Make() { .SetDefault(0.0f); AddAttr("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel") .SetDefault(0.0f); + AddAttr( + "use_addto", + "(bool, default false) If use addto strategy or not, only used in " + "cudnn kernel") + .SetDefault(false); AddAttr("fuse_residual_connection", "(bool, default false) Only used in mkldnn kernel. Used " "whenever convolution output is as an input to residual " @@ -460,6 +465,11 @@ void Conv3DOpMaker::Make() { .SetDefault(0.0f); AddAttr("fuse_beta", "(float, default 0.0) Only used in mkldnn kernel") .SetDefault(0.0f); + AddAttr( + "use_addto", + "(bool, default false) If use addto strategy or not, only used in " + "cudnn kernel") + .SetDefault(false); AddAttr("fuse_residual_connection", "(bool, default false) Only used in mkldnn kernel. Used " "whenever convolution output is as an input to residual " diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 534a19bd94a..97624944ca1 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -13,8 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_add_op.h" + #include #include + +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" namespace paddle { @@ -129,3 +132,18 @@ REGISTER_OP_CPU_KERNEL( int>, ops::ElementwiseAddDoubleGradKernel); + +// A specialization elementwise_add operator, used in gradient accumulation with +// inplace addto. +REGISTER_OPERATOR( + grad_add, paddle::operators::ElementwiseOp, + paddle::operators::ElementwiseAddOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL( + grad_add, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cu b/paddle/fluid/operators/elementwise/elementwise_add_op.cu index 71019872802..a4cbd14388b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cu @@ -111,3 +111,10 @@ REGISTER_OP_CUDA_KERNEL( ops::ElementwiseAddDoubleGradKernel, ops::ElementwiseAddDoubleGradKernel); + +REGISTER_OP_CUDA_KERNEL( + grad_add, ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel, + ops::ElementwiseAddKernel); diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index af8798a4b7c..9116edd01b0 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -521,3 +521,18 @@ DEFINE_int32( DEFINE_bool(sort_sum_gradient, false, "Sum gradients by the reverse order of " "the forward execution sequence."); + +/** + * Performance related FLAG + * Name: max_inplace_grad_add + * Since Version: 2.0.0 + * Value Range: int32, default=0 + * Example: + * Note: The maximum number of inplace grad_add. + */ +DEFINE_int32( + max_inplace_grad_add, 0, + "The maximum number of inplace grad_add. When doing " + "gradient accumulation, if the number of gradients need to that " + "less FLAGS_max_inplace_grad_add, than it will be use several grad_add" + "instead of sum. Default is 0."); diff --git a/paddle/fluid/pybind/global_value_getter_setter.cc b/paddle/fluid/pybind/global_value_getter_setter.cc index 318178d5eb9..894740e25c0 100644 --- a/paddle/fluid/pybind/global_value_getter_setter.cc +++ b/paddle/fluid/pybind/global_value_getter_setter.cc @@ -62,6 +62,7 @@ DECLARE_bool(use_system_allocator); // others DECLARE_bool(benchmark); DECLARE_int32(inner_op_parallelism); +DECLARE_int32(max_inplace_grad_add); DECLARE_string(tracer_profile_fname); #ifdef PADDLE_WITH_CUDA // cudnn @@ -348,7 +349,7 @@ static void RegisterGlobalVarGetterSetter() { FLAGS_init_allocated_mem, FLAGS_initial_cpu_memory_in_mb, FLAGS_memory_fraction_of_eager_deletion, FLAGS_use_pinned_memory, FLAGS_benchmark, FLAGS_inner_op_parallelism, FLAGS_tracer_profile_fname, - FLAGS_paddle_num_threads, FLAGS_use_mkldnn); + FLAGS_paddle_num_threads, FLAGS_use_mkldnn, FLAGS_max_inplace_grad_add); #ifdef PADDLE_WITH_CUDA REGISTER_PUBLIC_GLOBAL_VAR( diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 330254ecaaf..04087cb241c 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include @@ -22,6 +23,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" @@ -2528,6 +2530,10 @@ All parameter, weight, gradient are variables in Paddle. "enable_inplace", [](const BuildStrategy &self) { return self.enable_inplace_; }, [](BuildStrategy &self, bool b) { self.enable_inplace_ = b; }) + .def_property( + "enable_addto", + [](const BuildStrategy &self) { return self.enable_addto_; }, + [](BuildStrategy &self, bool b) { self.enable_addto_ = b; }) .def_property( "fuse_all_reduce_ops", [](const BuildStrategy &self) { diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 9f748b7956f..e8cc6ce9901 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -197,6 +197,7 @@ def __bootstrap__(): 'free_when_no_cache_hit', 'call_stack_level', 'sort_sum_gradient', + 'max_inplace_grad_add', ] if 'Darwin' not in sysstr: read_env_flags.append('use_pinned_memory') diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index d51cacd1a5c..478fecf74e4 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -251,12 +251,19 @@ def _rename_arg_(op_descs, old_name, new_name, begin_idx=None, end_idx=None): begin_idx = 0 if end_idx is None: end_idx = len(op_descs) - for i in range(begin_idx, end_idx): - op_desc = op_descs[i] - if isinstance(op_desc, tuple): - op_desc = op_desc[0] - op_desc._rename_input(old_name, new_name) - op_desc._rename_output(old_name, new_name) + if isinstance(op_descs, (list, tuple)): + for i in range(begin_idx, end_idx): + op_desc = op_descs[i] + if isinstance(op_desc, tuple): + op_desc = op_desc[0] + op_desc._rename_input(old_name, new_name) + op_desc._rename_output(old_name, new_name) + if isinstance(op_descs, collections.OrderedDict): + for key, value in op_descs.items(): + if isinstance(value, (list, tuple)): + for op_desc in value: + op_desc._rename_input(old_name, new_name) + op_desc._rename_output(old_name, new_name) def _create_op_desc_(op_type, inputs, outputs, attrs): @@ -369,6 +376,41 @@ def _append_grad_suffix_(name): return cpt.to_text(name) + core.grad_var_suffix() +def _accumulate_gradients_by_sum_op_(var_name, renamed_vars, pending_sum_ops, + op_idx): + """ + Use sum op to accumulate_gradients, the gradients are stored in renamed_vars. + """ + if op_idx not in pending_sum_ops.keys(): + pending_sum_ops[op_idx] = [] + pending_sum_ops[op_idx].append( + _create_op_desc_("sum", {"X": renamed_vars[var_name]}, + {"Out": [var_name]}, {"use_mkldnn": False})) + renamed_vars[var_name] = [var_name] + + +def _accumulate_gradients_by_add_ops_(var_name, renamed_vars, pending_sum_ops, + op_idx): + """ + Use several inplace add op to accumulate_gradients, the gradients are stored in renamed_vars. + """ + if op_idx not in pending_sum_ops.keys(): + pending_sum_ops[op_idx] = [] + out_name = renamed_vars[var_name][0] + for i in range(1, len(renamed_vars[var_name])): + x_name = out_name + y_name = renamed_vars[var_name][i] + if i != len(renamed_vars[var_name]) - 1: + out_name = var_name + '@ADD@' + str(i) + else: + out_name = var_name + pending_sum_ops[op_idx].append( + _create_op_desc_("grad_add", {"X": [x_name], + "Y": [y_name]}, {"Out": [out_name]}, + {"use_mkldnn": False})) + renamed_vars[var_name] = [var_name] + + def _addup_repetitive_outputs_(op_descs, block_idx): """ In backward part, an variable may be the output of more than one ops. @@ -376,7 +418,9 @@ def _addup_repetitive_outputs_(op_descs, block_idx): In these cases, the variable should be the accumulation of all the outputs. `sum_op`s are added to implement the accumulate. """ - pending_sum_ops = [] + _MAX_ADD_NUM_ = core.globals()['FLAGS_max_inplace_grad_add'] + #pending_sum_ops = [] + pending_sum_ops = collections.OrderedDict() var_rename_count = collections.defaultdict(int) renamed_vars = collections.defaultdict(list) renamed_var_start_idx = collections.defaultdict(list) @@ -385,10 +429,13 @@ def _addup_repetitive_outputs_(op_descs, block_idx): if "@GRAD" not in var_name: continue if len(renamed_vars[var_name]) > 1: - pending_sum_ops.append((_create_op_desc_( - "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, - {"use_mkldnn": False}), idx)) - renamed_vars[var_name] = [var_name] + if len(renamed_vars[var_name]) > _MAX_ADD_NUM_: + _accumulate_gradients_by_sum_op_(var_name, renamed_vars, + pending_sum_ops, idx) + else: + _accumulate_gradients_by_add_ops_(var_name, renamed_vars, + pending_sum_ops, idx) + for param_idx, param_name in enumerate(op_desc.output_names()): arg_names = op_desc.output(param_name) for arg_idx, var_name in enumerate(arg_names): @@ -440,13 +487,26 @@ def _addup_repetitive_outputs_(op_descs, block_idx): renamed_vars[var_name].append(new_name) for var_name, inputs in six.iteritems(renamed_vars): - if len(inputs) > 1: - pending_sum_ops.append( - (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, - {"use_mkldnn": False}), len(op_descs))) + if len(renamed_vars[var_name]) > 1: + if len(renamed_vars[var_name]) > _MAX_ADD_NUM_: + _accumulate_gradients_by_sum_op_(var_name, renamed_vars, + pending_sum_ops, len(op_descs)) + else: + _accumulate_gradients_by_add_ops_(var_name, renamed_vars, + pending_sum_ops, + len(op_descs)) + # sum_op descs are sorted according to their insert position - for p in reversed(pending_sum_ops): - op_descs.insert(p[1], p[0]) + for key, value in collections.OrderedDict( + reversed(list(pending_sum_ops.items()))).items(): + + # NOTE(zhiqiu): Since reversed, the idx of op_descs to be inserted will remains correct. + # For example, [0, 1, 2], and we want to insert 'a' at idx 1, 'b' at idx 2, and the expected result is [0, 1, 'a', 2, 'b']. + # If reversed, we first insert 'b' at idx 2, it becomes [0, 1, 2, 'b'], and then insert 'a' at idx 1, it becomes [0, 1, 'a', 2, 'b']. + # If not reverse, we first insert 'a' at idx 1, it becomes [0, 1, 'a', 2], and then insert 'b' at idx 2, it becomes [0, 1, 'a', 'b', 2]. + idx = key + for i, op in enumerate(value): + op_descs.insert(idx + i, op) return op_descs diff --git a/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py new file mode 100644 index 00000000000..c75acd7c15b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_inplace_addto_strategy.py @@ -0,0 +1,114 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from paddle.fluid.backward import calc_gradient +import numpy as np + + +class ConvBNLayer(fluid.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + use_cudnn=False): + super(ConvBNLayer, self).__init__() + + self._conv = fluid.dygraph.Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False, + use_cudnn=use_cudnn) + + self._batch_norm = fluid.dygraph.BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +def create_program(): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + x = fluid.data(name='img', shape=[-1, 3, 224, 224]) + x.stop_gradient = False + x = fluid.layers.prelu(x, mode="channel") + conv = ConvBNLayer( + num_channels=3, + num_filters=3, + filter_size=1, + act='relu', + use_cudnn=True) + y = conv(x) + x + + loss = fluid.layers.reduce_sum(y) + + sgd = fluid.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + return loss, main, startup, conv._conv.weight + + +class TestInplaceAddto(unittest.TestCase): + def test_result(self): + def run_program(enable_addto): + np.random.seed(10) + paddle.manual_seed(10) + paddle.framework.random._manual_program_seed(10) + if fluid.core.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + fluid.set_flags({"FLAGS_max_inplace_grad_add": 2}) + loss, main, startup, w = create_program() + place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + exe = fluid.Executor(place) + + strategy = fluid.BuildStrategy() + strategy.enable_addto = enable_addto + compiled = fluid.CompiledProgram(main).with_data_parallel( + loss_name=loss.name, build_strategy=strategy) + + exe.run(startup) + img = np.random.uniform(-128, 128, + [8, 3, 224, 224]).astype(np.float32) + for i in range(2): + res = exe.run(compiled, + feed={'img': img}, + fetch_list=[loss.name, w.name]) + return res + + res1, w1 = run_program(True) + res2, w2 = run_program(False) + print(res1, res2) + self.assertTrue(np.array_equal(res1, res2)) + + +if __name__ == "__main__": + unittest.main() -- GitLab From 37f7414fd854590f1b36a8fdd1d0d0ebea4276cc Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Mon, 21 Sep 2020 18:27:48 +0800 Subject: [PATCH 020/117] Optimization error message ;test=document_fix (#27424) --- tools/check_api_approvals.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh index b787ae62501..943b8c01e8c 100644 --- a/tools/check_api_approvals.sh +++ b/tools/check_api_approvals.sh @@ -39,9 +39,9 @@ fi api_spec_diff=`python ${PADDLE_ROOT}/tools/check_api_source_without_core_ops.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.source.md5 ${PADDLE_ROOT}/paddle/fluid/API_PR.source.md5` if [ "$api_spec_diff" != "" ]; then + echo_line="${echo_line}Related APIs: ${api_spec_diff}\n" echo_line="You must have one RD (zhiqiu (Recommend) or phlrain) approval for the api change for the opreator-related api without 'core.ops'.\n" echo_line="${echo_line}For more details, please click [https://github.com/PaddlePaddle/Paddle/wiki/paddle_api_development_manual.md]\n" - echo_line="${echo_line}Related APIs: ${api_spec_diff}\n" check_approval 1 6888866 43953930 fi -- GitLab From d93661942ed69527ea53259a4b6e65e8aef3cbea Mon Sep 17 00:00:00 2001 From: Shang Zhizhou Date: Mon, 21 Sep 2020 19:10:04 +0800 Subject: [PATCH 021/117] fix bug sequececonv_eltadd_relu_fuse_pass (#27404) * fix bug sequececonv_eltadd_relu_fuse_pass, output error when sequence_conv's padding_start > 0 * fix seqconv_eltadd_relu_fuse_pass unitest error --- .../ir/seqconv_eltadd_relu_fuse_pass.cc | 7 + .../fused/fusion_seqconv_eltadd_relu_op.cc | 18 ++- .../test_seqconv_eltadd_relu_fuse_pass.py | 140 ++++++++++++++++++ 3 files changed, 159 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc index 1485a84d001..75ab04f1b91 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc @@ -16,6 +16,7 @@ #include #include #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -98,3 +99,9 @@ void SeqConvEltAddReluFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(seqconv_eltadd_relu_fuse_pass, paddle::framework::ir::SeqConvEltAddReluFusePass); +REGISTER_PASS_CAPABILITY(seqconv_eltadd_relu_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("sequence_conv", 0) + .EQ("elementwise_add", 0) + .EQ("relu", 0)); diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc index a6c9a137b54..c5a291f10b2 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc @@ -192,6 +192,9 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { copy_size += src_mat_w_sz; } // fill data + if (context_start > 0) { + src_data += context_start * src_mat_w; + } for (int j = 0; j < seq_len - up_pad - down_pad; ++j) { std::memcpy(dst_data, src_data, copy_size); dst_data += col_mat_w; @@ -201,18 +204,15 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { std::memset(dst_data, 0, down_pad * col_mat_w_sz); copy_size -= src_mat_w_sz; for (int j = 0; j < down_pad; ++j) { + if (copy_size < 0) { + copy_size = 0; + } std::memcpy(dst_data, src_data, copy_size); dst_data += col_mat_w; src_data += src_mat_w; copy_size -= src_mat_w_sz; } } else { - PADDLE_ENFORCE_GE(context_length, up_pad + down_pad + 1, - platform::errors::InvalidArgument( - "context length must be bigger or equal than " - "up_pad + down_pad + 1, but received context " - "length is: %d, up_pad is: %d, down_pad is: %d.", - context_length, up_pad, down_pad)); std::memset(dst_data, 0, seq_len * col_mat_w_sz); dst_data = dst_data + up_pad * src_mat_w; int zero_sz = up_pad * src_mat_w_sz; @@ -226,9 +226,15 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel { // from bottom dst_data = col_data + ed * col_mat_w; src_data = x_data + st * src_mat_w; + if (context_start > 0) { + src_data += context_start * src_mat_w; + } zero_sz = down_pad * src_mat_w_sz; for (int j = 1; j <= std::min(down_pad, seq_len); ++j) { int copy_size = std::min(cur_src_sz, col_mat_w_sz - zero_sz); + if (copy_size < 0) { + copy_size = 0; + } std::memcpy(dst_data - (zero_sz + copy_size) / sizeof(T), src_data + std::max(seq_len - j - up_pad, 0) * src_mat_w, copy_size); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py new file mode 100644 index 00000000000..eadda5ba06a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seqconv_eltadd_relu_fuse_pass.py @@ -0,0 +1,140 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class SeqconvEltaddReluFusePassTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + padding_start=0, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartPositive(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[-1, 4], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=3, + padding_start=2, + act="relu", + bias_attr=param_attr) + + np_data = np.array([[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3], + [4, 4, 4, 4], [5, 5, 5, 5], [6, 6, 6, 6], + [7, 7, 7, 7]]).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[5, 2]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartNegative(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + padding_start=-1, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +class SeqconvEltaddReluFusePassTestPaddingStartNone(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[100, 100], dtype="float32") + param_attr = fluid.ParamAttr( + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + conv_out = fluid.layers.sequence_conv( + input=data, + num_filters=16, + filter_size=4, + act="relu", + bias_attr=param_attr) + + np_data = np.random.random((80, 100)).astype('float32') + x_lod_tensor = fluid.create_lod_tensor(np_data, [[10, 20, 30, 20]], + fluid.CPUPlace()) + self.feeds = {"data": x_lod_tensor} + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def test_check_output(self): + self.check_output() + self.assertTrue( + PassVersionChecker.IsCompatible('seqconv_eltadd_relu_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() -- GitLab From e217e965a9fd3bcb022381ce81431b48f060d5c8 Mon Sep 17 00:00:00 2001 From: Double_V Date: Mon, 21 Sep 2020 19:14:37 +0800 Subject: [PATCH 022/117] fix pool bug (#27366) --- python/paddle/nn/functional/pooling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 042625a3dbd..1eb9167d035 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -713,7 +713,7 @@ def max_pool2d(x, 'data_format', data_format) return output - op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "max_pool2d" + op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "pool2d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -839,7 +839,7 @@ def max_pool3d(x, 'data_format', data_format) return output - op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "max_pool3d" + op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "pool3d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) -- GitLab From fc61efd736a04b97c0a8ce4f75a6ddfc577a9663 Mon Sep 17 00:00:00 2001 From: danleifeng <52735331+danleifeng@users.noreply.github.com> Date: Mon, 21 Sep 2020 20:03:37 +0800 Subject: [PATCH 023/117] fix port env bug(int);test=develop (#27405) --- python/paddle/distributed/fleet/launch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 4b629bc35ce..17fa0a0c7c3 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -156,7 +156,7 @@ def get_cluster_from_args(args, gpus): else: start_port = 6070 if os.environ.get('FLAGS_START_PORT') is not None: - start_port = os.environ.get('FLAGS_START_PORT') + start_port = int(os.environ.get('FLAGS_START_PORT')) free_ports = [x for x in range(start_port, start_port + len(gpus))] -- GitLab From 4bd7aa25668b06a00e528900a9f3485e785beb0f Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Mon, 21 Sep 2020 20:09:09 +0800 Subject: [PATCH 024/117] use paddle.get_default_dtype in vision datasets. test=develop (#27426) --- python/paddle/vision/datasets/cifar.py | 5 ++++- python/paddle/vision/datasets/flowers.py | 5 ++++- python/paddle/vision/datasets/folder.py | 3 +++ python/paddle/vision/datasets/mnist.py | 5 ++++- python/paddle/vision/datasets/voc2012.py | 5 ++++- 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/python/paddle/vision/datasets/cifar.py b/python/paddle/vision/datasets/cifar.py index 631892ee4dc..c531f3d0e4e 100644 --- a/python/paddle/vision/datasets/cifar.py +++ b/python/paddle/vision/datasets/cifar.py @@ -19,6 +19,7 @@ import numpy as np import six from six.moves import cPickle as pickle +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -113,6 +114,8 @@ class Cifar10(Dataset): # read dataset into memory self._load_data() + self.dtype = paddle.get_default_dtype() + def _init_url_md5_flag(self): self.data_url = CIFAR10_URL self.data_md5 = CIFAR10_MD5 @@ -142,7 +145,7 @@ class Cifar10(Dataset): image = np.reshape(image, [3, 32, 32]) if self.transform is not None: image = self.transform(image) - return image, label + return image.astype(self.dtype), np.array(label).astype('int64') def __len__(self): return len(self.data) diff --git a/python/paddle/vision/datasets/flowers.py b/python/paddle/vision/datasets/flowers.py index 1c0f41123e2..2251333fd8d 100644 --- a/python/paddle/vision/datasets/flowers.py +++ b/python/paddle/vision/datasets/flowers.py @@ -21,6 +21,7 @@ import numpy as np import scipy.io as scio from PIL import Image +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -104,6 +105,8 @@ class Flowers(Dataset): # read dataset into memory self._load_anno() + self.dtype = paddle.get_default_dtype() + def _load_anno(self): self.name2mem = {} self.data_tar = tarfile.open(self.data_file) @@ -124,7 +127,7 @@ class Flowers(Dataset): if self.transform is not None: image = self.transform(image) - return image, label.astype('int64') + return image.astype(self.dtype), label.astype('int64') def __len__(self): return len(self.indexes) diff --git a/python/paddle/vision/datasets/folder.py b/python/paddle/vision/datasets/folder.py index 8a3053abefc..19d913504bd 100644 --- a/python/paddle/vision/datasets/folder.py +++ b/python/paddle/vision/datasets/folder.py @@ -15,6 +15,7 @@ import os import sys +import paddle from paddle.io import Dataset from paddle.utils import try_import @@ -143,6 +144,8 @@ class DatasetFolder(Dataset): self.samples = samples self.targets = [s[1] for s in samples] + self.dtype = paddle.get_default_dtype() + def _find_classes(self, dir): """ Finds the class folders in a dataset. diff --git a/python/paddle/vision/datasets/mnist.py b/python/paddle/vision/datasets/mnist.py index 597d4046441..16c39e56ef0 100644 --- a/python/paddle/vision/datasets/mnist.py +++ b/python/paddle/vision/datasets/mnist.py @@ -19,6 +19,7 @@ import gzip import struct import numpy as np +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -95,6 +96,8 @@ class MNIST(Dataset): # read dataset into memory self._parse_dataset() + self.dtype = paddle.get_default_dtype() + def _parse_dataset(self, buffer_size=100): self.images = [] self.labels = [] @@ -145,7 +148,7 @@ class MNIST(Dataset): image = np.reshape(image, [1, 28, 28]) if self.transform is not None: image = self.transform(image) - return image, label + return image.astype(self.dtype), label.astype('int64') def __len__(self): return len(self.labels) diff --git a/python/paddle/vision/datasets/voc2012.py b/python/paddle/vision/datasets/voc2012.py index ae14ea30163..5fc9d7c3815 100644 --- a/python/paddle/vision/datasets/voc2012.py +++ b/python/paddle/vision/datasets/voc2012.py @@ -19,6 +19,7 @@ import tarfile import numpy as np from PIL import Image +import paddle from paddle.io import Dataset from paddle.dataset.common import _check_exists_and_download @@ -96,6 +97,8 @@ class VOC2012(Dataset): # read dataset into memory self._load_anno() + self.dtype = paddle.get_default_dtype() + def _load_anno(self): self.name2mem = {} self.data_tar = tarfile.open(self.data_file) @@ -127,7 +130,7 @@ class VOC2012(Dataset): label = np.array(label) if self.transform is not None: data = self.transform(data) - return data, label + return data.astype(self.dtype), label.astype(self.dtype) def __len__(self): return len(self.data) -- GitLab From 1d3b27cae8a7d88db80358a2810279874835fc68 Mon Sep 17 00:00:00 2001 From: ceci3 Date: Mon, 21 Sep 2020 20:22:41 +0800 Subject: [PATCH 025/117] add double grad compute for batch norm (#27296) * add double grad compute for batch norm,test=develop * fix unittest, test=develop * remove unuse tensor,test=develop * add format,test=develop * update, test=develop --- paddle/fluid/operators/batch_norm_op.cc | 405 ++++++++++++++- paddle/fluid/operators/batch_norm_op.cu | 44 ++ paddle/fluid/operators/batch_norm_op.h | 61 +++ paddle/fluid/operators/instance_norm_op.cc | 8 +- paddle/fluid/operators/norm_utils.cu.h | 486 ++++++++++++++++++ python/paddle/fluid/layers/nn.py | 4 +- .../unittests/test_imperative_double_grad.py | 2 +- .../tests/unittests/test_norm_nn_grad.py | 62 +++ 8 files changed, 1066 insertions(+), 6 deletions(-) create mode 100644 paddle/fluid/operators/norm_utils.cu.h diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index c92f72e653d..dcfe8bb1bb4 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -831,6 +831,401 @@ void BatchNormGradMaker::Apply(GradOpPtr op) const { op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); } +template +void BatchNormDoubleGradMaker::Apply(GradOpPtr op) const { + op->SetType("batch_norm_grad_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Scale", this->Input("Scale")); + op->SetInput("SavedMean", this->Input("SavedMean")); + op->SetInput("SavedVariance", this->Input("SavedVariance")); + if (BOOST_GET_CONST(bool, this->GetAttr("use_global_stats"))) { + op->SetInput("Variance", this->Input("Variance")); + } + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetInput("DDScale", this->OutputGrad(framework::GradVarName("Scale"))); + op->SetInput("DDBias", this->OutputGrad(framework::GradVarName("Bias"))); + op->SetInput("DY", this->Input(framework::GradVarName("Y"))); + + op->SetAttrMap(this->Attrs()); + op->SetOutput("DX", this->InputGrad("X")); + op->SetOutput("DScale", this->InputGrad("Scale")); + op->SetOutput("DDY", this->InputGrad(framework::GradVarName("Y"))); +} + +void BatchNormDoubleGradOp::InferShape( + framework::InferShapeContext *ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean", + "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance", + "BatchNormDoubleGrad"); + + const bool use_global_stats = ctx->Attrs().Get("use_global_stats"); + if (use_global_stats) { + OP_INOUT_CHECK(ctx->HasInput("Variance"), "Input", "VarianceOut", + "BatchNormDoubleGrad"); + } + + OP_INOUT_CHECK(ctx->HasInput("DDX"), "Input", "DDX", "BatchNormDoubleGrad"); + OP_INOUT_CHECK(ctx->HasInput("DY"), "Input", "DY", "BatchNormDoubleGrad"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput("DX"), "Output", "DX", "BatchNormDoubleGrad"); + + const auto x_dims = ctx->GetInputDim("X"); + const int C = x_dims[1]; + if (ctx->HasOutput("DX")) { + ctx->SetOutputDim("DX", x_dims); + } + if (ctx->HasOutput("DScale")) { + ctx->SetOutputDim("DScale", {C}); + } + if (ctx->HasOutput("DDY")) { + ctx->ShareDim("X", "DDY"); + } +} + +framework::OpKernelType BatchNormDoubleGradOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + const auto *var = ctx.InputVar("DY"); + if (var == nullptr) { + PADDLE_THROW( + platform::errors::NotFound("cannot find gradient variable of Y")); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW( + platform::errors::InvalidArgument("gradient variable of Y is empty")); + } + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace()); +} + +template +class BatchNormDoubleGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *X = ctx.Input("X"); + const auto *Scale = ctx.Input("Scale"); + const auto *dY = ctx.Input("DY"); + const auto *Saved_mean = ctx.Input("SavedMean"); + const auto *Saved_variance = ctx.Input("SavedVariance"); + const float epsilon = ctx.Attr("epsilon"); + const bool use_global_stats = ctx.Attr("use_global_stats"); + const bool is_test = ctx.Attr("is_test"); + + PADDLE_ENFORCE_EQ( + is_test, false, + platform::errors::InvalidArgument( + "`is_test = True` CANNOT be used in train program. If " + "you want to use global status in pre_train model, " + "please set `use_global_stats = True`")); + + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + const auto *ddX = ctx.Input("DDX"); + const auto *ddScale = ctx.Input("DDScale"); + const auto *ddBias = ctx.Input("DDBias"); + + auto *dX = ctx.Output("DX"); + auto *dScale = ctx.Output("DScale"); + auto *ddY = ctx.Output("DDY"); + dX->mutable_data(ctx.GetPlace()); + ddY->mutable_data(ctx.GetPlace()); + + auto &dev_ctx = ctx.template device_context(); + + const auto &x_dims = X->dims(); + const int C = + (data_layout == DataLayout::kNCHW ? x_dims[1] + : x_dims[x_dims.size() - 1]); + const int sample_size = X->numel() / C; + math::SetConstant set_constant; + + const T *mean_data = Saved_mean->data(); + const T *inv_var_data = Saved_variance->data(); + + Tensor inv_var_tensor; + if (use_global_stats) { + const auto *running_variance = ctx.Input("Variance"); + inv_var_tensor.Resize({C}); + + T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); + EigenVectorArrayMap inv_var_tmp(running_inv_var_data, C); + ConstEigenVectorArrayMap var_arr(running_variance->data(), C); + + inv_var_tmp = (var_arr + epsilon).sqrt().inverse(); + inv_var_data = running_inv_var_data; + } + + // transpose NCHW -> NHWC for easy calculate + Tensor transformed_x(X->type()); + Tensor transformed_dy(dY->type()); + Tensor transformed_ddx(ddX->type()); + + Tensor transformed_dx(dX->type()); + Tensor transformed_ddy(ddY->type()); + if (data_layout == DataLayout::kNCHW && x_dims.size() > 2) { + VLOG(3) << "Transform batchnorm output from NCHW to NHWC"; + // Input Tensor + ResizeToChannelLast(ctx, X, + &transformed_x); + TransToChannelLast(ctx, X, &transformed_x); + ResizeToChannelLast(ctx, dY, + &transformed_dy); + TransToChannelLast(ctx, dY, + &transformed_dy); + ResizeToChannelLast(ctx, ddX, + &transformed_ddx); + TransToChannelLast(ctx, ddX, + &transformed_ddx); + // Output Tensor + ResizeToChannelLast(ctx, dX, + &transformed_dx); + ResizeToChannelLast(ctx, ddY, + &transformed_ddy); + } else { + transformed_x.ShareDataWith(*X); + transformed_dy.ShareDataWith(*dY); + transformed_ddx.ShareDataWith(*ddX); + + transformed_dx.ShareDataWith(*dX); + transformed_ddy.ShareDataWith(*ddY); + } + + ConstEigenArrayMap x_arr(transformed_x.data(), C, sample_size); + ConstEigenVectorArrayMap mean_arr(mean_data, C); + ConstEigenVectorArrayMap inv_var_arr(inv_var_data, C); + + Tensor mean_tile; + mean_tile.Resize({C, sample_size}); + mean_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap mean_tile_data(mean_tile.mutable_data(ctx.GetPlace()), + C, sample_size); + + Tensor inv_var_tile; + inv_var_tile.Resize({C, sample_size}); + inv_var_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap inv_var_tile_data( + inv_var_tile.mutable_data(ctx.GetPlace()), C, sample_size); + + mean_tile_data = mean_arr.replicate(1, sample_size); + inv_var_tile_data = inv_var_arr.replicate(1, sample_size); + + Tensor Scale_data; + if (!Scale) { + Scale_data.mutable_data({C}, ctx.GetPlace()); + set_constant(dev_ctx, &Scale_data, static_cast(1)); + } + ConstEigenVectorArrayMap scale_arr( + Scale ? Scale->data() : Scale_data.data(), C); + + Tensor scale_tile; + scale_tile.Resize({C, sample_size}); + scale_tile.mutable_data(ctx.GetPlace()); + EigenArrayMap scale_tile_data(scale_tile.mutable_data(ctx.GetPlace()), + C, sample_size); + scale_tile_data = scale_arr.replicate(1, sample_size); + + ConstEigenArrayMap dy_arr(transformed_dy.data(), C, sample_size); + ConstEigenArrayMap ddx_arr(transformed_ddx.data(), C, sample_size); + + Tensor x_sub_mean_mul_invstd; + x_sub_mean_mul_invstd.Resize({C, sample_size}); + x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()); + EigenArrayMap x_sub_mean_mul_invstd_arr( + x_sub_mean_mul_invstd.mutable_data(ctx.GetPlace()), C, sample_size); + x_sub_mean_mul_invstd_arr = (x_arr - mean_tile_data) * inv_var_tile_data; + + if (dX) { + dX->mutable_data(ctx.GetPlace()); + EigenArrayMap dx_arr(transformed_dx.mutable_data(ctx.GetPlace()), C, + sample_size); + dx_arr.setZero(); + if (use_global_stats) { + // math: dx = (ddscale * dy) * inv_var + if (ddScale) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + dx_arr = dy_arr * ddscale_tile_data * inv_var_tile_data; + } + } else { + // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, + // axis=(n,h,w)) * + // np.sum(dy, axis=(n,h,w)) - + // np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x - + // mean), + // axis=(n,h,w)) * inv_var.pow(2) * + // np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) / + // NxHxW * + // np.sum(ddx * (x - mean)) * + // (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * + // np.sum(dy, + // axis=(n,h,w)) * (x - mean) * + // (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - + // inv_var + // * + // np.mean(dy, axis=(n,h,w)) - + // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), + // axis=(n,h,w)))) + + if (ddX) { + dx_arr += + (x_sub_mean_mul_invstd_arr * inv_var_tile_data * + inv_var_tile_data / sample_size) + .colwise() * + (ddx_arr.rowwise().sum() * dy_arr.rowwise().sum() / sample_size - + (dy_arr * ddx_arr).rowwise().sum() + + 3. * (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() * + (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size); + + dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * + (ddx_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size * + (dy_arr.rowwise().sum() / sample_size - dy_arr); + + dx_arr += (inv_var_tile_data * inv_var_tile_data).colwise() * + (dy_arr * x_sub_mean_mul_invstd_arr).rowwise().sum() / + sample_size * + (ddx_arr.rowwise().sum() / sample_size - ddx_arr); + + dx_arr = scale_tile_data * dx_arr; + } + if (ddScale) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + dx_arr += (dy_arr * inv_var_tile_data - + (dy_arr.rowwise().sum().replicate(1, sample_size) / + sample_size) * + inv_var_tile_data - + x_sub_mean_mul_invstd_arr * inv_var_tile_data * + (dy_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size) * + ddscale_tile_data; + } + } + if (data_layout == DataLayout::kNCHW) { + VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; + TransToChannelFirst( + ctx, &transformed_dx, dX); + } + } + if (dScale) { + dScale->mutable_data(ctx.GetPlace()); + EigenVectorArrayMap dscale_arr(dScale->mutable_data(ctx.GetPlace()), + C); + dscale_arr.setZero(); + if (use_global_stats) { + // math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var + if (ddX) { + dscale_arr = (ddx_arr * dy_arr * inv_var_tile_data).rowwise().sum(); + } + } else { + // math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) * + // inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) * + // ddx + if (ddX) { + Tensor first_grad; + first_grad.Resize({C, sample_size}); + EigenArrayMap first_grad_arr( + first_grad.mutable_data(ctx.GetPlace()), C, sample_size); + first_grad_arr.setZero(); + + first_grad_arr += + inv_var_tile_data * + (dy_arr - + dy_arr.rowwise().sum().replicate(1, sample_size) / sample_size - + x_sub_mean_mul_invstd_arr * + (dy_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size); + dscale_arr = (first_grad_arr * ddx_arr).rowwise().sum(); + } + } + } + + if (ddY) { + ddY->mutable_data(ctx.GetPlace()); + EigenArrayMap ddy_arr(transformed_ddy.mutable_data(ctx.GetPlace()), + C, sample_size); + ddy_arr.setZero(); + if (use_global_stats) { + // math: ddy = r * ddx * inv_var + if (ddX) { + ddy_arr = scale_tile_data * ddx_arr * inv_var_tile_data; + } + } else { + // math: ddy = (x - mean) * inv_var * ddscale + ddbias + + // scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) * + // np.mean(ddx * (x - mean), axis=(n,h,w))) + if (ddX) { + ddy_arr += + scale_tile_data * inv_var_tile_data * + (ddx_arr - + ddx_arr.rowwise().sum().replicate(1, sample_size) / sample_size - + x_sub_mean_mul_invstd_arr * + (ddx_arr * x_sub_mean_mul_invstd_arr) + .rowwise() + .sum() + .replicate(1, sample_size) / + sample_size); + } + if (ddScale && ddBias) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + ConstEigenVectorArrayMap ddbias_arr(ddBias->data(), C); + Tensor ddbias_tile; + ddbias_tile.Resize({C, sample_size}); + EigenArrayMap ddbias_tile_data( + ddbias_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddbias_tile_data = ddbias_arr.replicate(1, sample_size); + + ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data; + ddy_arr += ddbias_tile_data; + } + } + if (data_layout == DataLayout::kNCHW) { + VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; + TransToChannelFirst( + ctx, &transformed_ddy, ddY); + } + } + } +}; + +DECLARE_INPLACE_OP_INFERER(BatchNormDoubleGradOpInplaceInferer, {"DY", "DDY"}); + } // namespace operators } // namespace paddle @@ -839,7 +1234,11 @@ REGISTER_OPERATOR(batch_norm, ops::BatchNormOp, ops::BatchNormOpMaker, ops::BatchNormOpInferVarType, ops::BatchNormGradMaker, ops::BatchNormGradMaker); -REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp); +REGISTER_OPERATOR(batch_norm_grad, ops::BatchNormGradOp, + ops::BatchNormDoubleGradMaker, + ops::BatchNormDoubleGradMaker); +REGISTER_OPERATOR(batch_norm_grad_grad, ops::BatchNormDoubleGradOp, + ops::BatchNormDoubleGradOpInplaceInferer); REGISTER_OP_CPU_KERNEL( batch_norm, ops::BatchNormKernel, @@ -848,3 +1247,7 @@ REGISTER_OP_CPU_KERNEL( batch_norm_grad, ops::BatchNormGradKernel, ops::BatchNormGradKernel); +REGISTER_OP_CPU_KERNEL( + batch_norm_grad_grad, + ops::BatchNormDoubleGradKernel, + ops::BatchNormDoubleGradKernel); diff --git a/paddle/fluid/operators/batch_norm_op.cu b/paddle/fluid/operators/batch_norm_op.cu index be834772679..2d5b395ac68 100644 --- a/paddle/fluid/operators/batch_norm_op.cu +++ b/paddle/fluid/operators/batch_norm_op.cu @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/norm_utils.cu.h" #include "paddle/fluid/platform/cudnn_helper.h" #include "paddle/fluid/platform/float16.h" @@ -840,6 +841,45 @@ class BatchNormGradKernel } }; +template +class BatchNormDoubleGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *X = ctx.Input("X"); + const auto *Scale = ctx.Input("Scale"); + const auto *dY = ctx.Input("DY"); + const auto *Saved_mean = ctx.Input("SavedMean"); + const auto *Saved_variance = ctx.Input("SavedVariance"); + const double epsilon = static_cast(ctx.Attr("epsilon")); + const bool use_global_stats = ctx.Attr("use_global_stats"); + const bool is_test = ctx.Attr("is_test"); + + PADDLE_ENFORCE_EQ( + is_test, false, + platform::errors::InvalidArgument( + "`is_test = True` CANNOT be used in train program. If " + "you want to use global status in pre_train model, " + "please set `use_global_stats = True`")); + + const std::string data_layout_str = ctx.Attr("data_layout"); + const DataLayout data_layout = + framework::StringToDataLayout(data_layout_str); + + const auto *ddX = ctx.Input("DDX"); + const auto *ddScale = ctx.Input("DDScale"); + const auto *ddBias = ctx.Input("DDBias"); + + auto *dX = ctx.Output("DX"); + auto *dScale = ctx.Output("DScale"); + auto *ddY = ctx.Output("DDY"); + + NormDoubleGradFunctor( + ctx, data_layout, X, Scale, dY, Saved_mean, Saved_variance, epsilon, + use_global_stats, ddX, ddScale, ddBias, dX, dScale, ddY); + } +}; + } // namespace operators } // namespace paddle @@ -853,3 +893,7 @@ REGISTER_OP_CUDA_KERNEL( batch_norm_grad, ops::BatchNormGradKernel, ops::BatchNormGradKernel, ops::BatchNormGradKernel); +REGISTER_OP_CUDA_KERNEL( + batch_norm_grad_grad, + ops::BatchNormDoubleGradKernel, + ops::BatchNormDoubleGradKernel); diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 9f844b7c078..1440b74290c 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -103,6 +103,42 @@ inline void TransToChannelFirst(const framework::ExecutionContext& context, } } +template +inline void ResizeToChannelLast(const framework::ExecutionContext& context, + const Tensor* input, + Tensor* transformed_input) { + int dim = input->dims().size() - 2; + if (dim == 3) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[3]; + in_dims_vec[3] = input->dims()[4]; + in_dims_vec[4] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + + } else if (dim == 2) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[3]; + in_dims_vec[3] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + } else if (dim == 1) { + transformed_input->Resize(input->dims()); + + auto in_dims_vec = framework::vectorize(input->dims()); + in_dims_vec[1] = input->dims()[2]; + in_dims_vec[2] = input->dims()[1]; + transformed_input->Resize(framework::make_ddim(in_dims_vec)); + transformed_input->mutable_data(context.GetPlace()); + } +} + template inline void TransToChannelLast(const framework::ExecutionContext& context, const Tensor* input, Tensor* transformed_input) { @@ -154,6 +190,16 @@ class BatchNormGradOp : public framework::OperatorWithKernel { const framework::OpKernelType& expected_kernel_type) const override; }; +class BatchNormDoubleGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override; @@ -168,6 +214,15 @@ class BatchNormGradMaker : public framework::SingleGradOpMaker { void Apply(GradOpPtr op) const override; }; +template +class BatchNormDoubleGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override; +}; + class BatchNormOpInferVarType : public framework::PassInDtypeAndVarTypeToOutput { protected: @@ -190,5 +245,11 @@ class BatchNormGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override; }; +template +class BatchNormDoubleGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index f72f7e8b85b..a5b270c1dfe 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -595,9 +595,13 @@ class InstanceNormDoubleGradKernel first_grad_arr += inv_var_tile_data * - (dy_arr - dy_arr.colwise().sum() / sample_size - + (dy_arr - + dy_arr.colwise().sum().replicate(sample_size, 1) / sample_size - x_sub_mean_mul_invstd_arr * - (dy_arr * x_sub_mean_mul_invstd_arr).colwise().sum() / + (dy_arr * x_sub_mean_mul_invstd_arr) + .colwise() + .sum() + .replicate(sample_size, 1) / sample_size); first_grad_arr = first_grad_arr * ddx_arr; for (int nc = 0; nc < NxC; ++nc) { diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h new file mode 100644 index 00000000000..07333f1ae11 --- /dev/null +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -0,0 +1,486 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include +#include "cub/cub.cuh" +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/cudnn_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using DataLayout = framework::DataLayout; + +// math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, +// axis=(n,h,w)) * +// np.sum(dy, axis=(n,h,w)) - +// np.sum(dy * ddx, axis=(n,h,w)) + 3 * np.mean(dy * (x - +// mean), +// axis=(n,h,w)) * inv_var.pow(2) * +// np.sum(ddx * (x - mean), axis=(n,h,w))) + inv_var.pow(3) / +// NxHxW * +// np.sum(ddx * (x - mean)) * +// (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * +// np.sum(dy, +// axis=(n,h,w)) * (x - mean) * +// (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - +// inv_var +// * +// np.mean(dy, axis=(n,h,w)) - +// inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), +// axis=(n,h,w)))) + +template +__global__ void DoubleGradComputeDX(const T *x, const T *mean, + const T *variance, const T *ddx, + const T *dy, const T *scale, + const T *ddscale, const int N, const int C, + const int sample_size, const double epsilon, + T *dx) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage dy_storage; + __shared__ typename BlockReduce::TempStorage ddx_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_ddx_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_x_sub_mean_storage; + __shared__ typename BlockReduce::TempStorage ddx_mul_x_sub_mean_storage; + __shared__ T dy_sum_val; + __shared__ T ddx_sum_val; + __shared__ T dy_mul_ddx_sum_val; + __shared__ T dy_mul_x_sub_mean_sum_val; + __shared__ T ddx_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T mean_val = mean[i]; + T var_val = variance[i]; + T dy_sum = 0; + T ddx_sum = 0; + T dy_mul_ddx_sum = 0; + T dy_mul_x_sub_mean_sum = 0; + T ddx_mul_x_sub_mean_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + T dy_i = dy[index]; + T tmp = x[index] - mean_val; + + dy_sum += dy_i; + ddx_sum += ddx_i; + dy_mul_ddx_sum += (ddx_i * dy_i); + + dy_mul_x_sub_mean_sum += (dy_i * tmp); + ddx_mul_x_sub_mean_sum += (ddx_i * tmp); + } + + dy_sum = BlockReduce(dy_storage).Reduce(dy_sum, cub::Sum()); + ddx_sum = BlockReduce(ddx_storage).Reduce(ddx_sum, cub::Sum()); + dy_mul_ddx_sum = + BlockReduce(dy_mul_ddx_storage).Reduce(dy_mul_ddx_sum, cub::Sum()); + dy_mul_x_sub_mean_sum = BlockReduce(dy_mul_x_sub_mean_storage) + .Reduce(dy_mul_x_sub_mean_sum, cub::Sum()); + ddx_mul_x_sub_mean_sum = BlockReduce(ddx_mul_x_sub_mean_storage) + .Reduce(ddx_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + dy_sum_val = dy_sum; + ddx_sum_val = ddx_sum; + dy_mul_ddx_sum_val = dy_mul_ddx_sum; + dy_mul_x_sub_mean_sum_val = dy_mul_x_sub_mean_sum; + ddx_mul_x_sub_mean_sum_val = ddx_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dx[index] += + ((x[index] - mean_val) * var_val * var_val * var_val / inner_size * + (ddx_sum_val * dy_sum_val / inner_size - dy_mul_ddx_sum_val + + 3. * dy_mul_x_sub_mean_sum_val * var_val * + ddx_mul_x_sub_mean_sum_val * var_val / inner_size) + + ddx_mul_x_sub_mean_sum_val * var_val / inner_size * var_val * + var_val * (dy_sum_val / inner_size - dy[index]) + + dy_mul_x_sub_mean_sum_val * var_val / inner_size * var_val * + var_val * (ddx_sum_val / inner_size - ddx[index])) * + scale[i]; + } + } + __syncthreads(); + if (ddscale != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dx[index] += (dy[index] * var_val - dy_sum_val / inner_size * var_val - + (x[index] - mean_val) * var_val * + dy_mul_x_sub_mean_sum_val * var_val / inner_size) * + ddscale[i]; + } + } + } +} + +// math: ddy = (x - mean) * inv_var * ddscale + ddbias + +// scale * inv_var * (ddx - (x - mean) * inv_var.pow(2) * +// np.mean(ddx * (x - mean), axis=(n,h,w))) +template +__global__ void DoubleGradComputeDDY(const T *x, const T *mean, + const T *variance, const T *ddscale, + const T *ddbias, const T *ddx, + const T *scale, const int N, const int C, + const int sample_size, + const double epsilon, T *ddy) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage ddx_storage; + __shared__ typename BlockReduce::TempStorage ddx_mul_x_sub_mean_storage; + __shared__ T ddx_sum_val; + __shared__ T ddx_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T mean_val = mean[i]; + T var_val = variance[i]; + T ddx_sum = 0; + T ddx_mul_x_sub_mean_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + ddx_sum += ddx_i; + ddx_mul_x_sub_mean_sum += (ddx_i * (x[index] - mean_val)); + } + ddx_sum = BlockReduce(ddx_storage).Reduce(ddx_sum, cub::Sum()); + ddx_mul_x_sub_mean_sum = BlockReduce(ddx_mul_x_sub_mean_storage) + .Reduce(ddx_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + ddx_sum_val = ddx_sum; + ddx_mul_x_sub_mean_sum_val = ddx_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += scale[i] * var_val * + (ddx[index] - ddx_sum_val / inner_size - + (x[index] - mean_val) * var_val * + ddx_mul_x_sub_mean_sum_val * var_val / inner_size); + } + } + __syncthreads(); + if (ddscale != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += (x[index] - mean_val) * var_val * ddscale[i]; + } + } + __syncthreads(); + if (ddbias != nullptr) { + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + ddy[index] += ddbias[i]; + } + } + } +} + +// math: dscale = inv_var * (dy - np.mean(dy, axis=(n,h,w) - (x-mean) * +// inv_var.pow(2) * np.mean(dy * (x-mean), axis=(n,h,w)))) * +// ddx +template +__global__ void DoubleGradComputeDScale(const T *x, const T *mean, + const T *variance, const T *ddx, + const T *dy, const int N, const int C, + const int sample_size, + const double epsilon, T *dscale) { + const int outer_size = C; + const int inner_size = N * sample_size; + + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage dy_storage; + __shared__ typename BlockReduce::TempStorage dy_mul_x_sub_mean_storage; + __shared__ typename BlockReduce::TempStorage dscale_tmp_storage; + __shared__ T dy_sum_val; + __shared__ T dy_mul_x_sub_mean_sum_val; + + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T dy_sum = 0; + T dy_mul_x_sub_mean_sum = 0; + T mean_val = mean[i]; + T var_val = variance[i]; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T dy_i = dy[index]; + dy_sum += dy_i; + dy_mul_x_sub_mean_sum += (dy_i * (x[index] - mean_val)); + } + dy_sum = BlockReduce(dy_storage).Reduce(dy_sum, cub::Sum()); + dy_mul_x_sub_mean_sum = BlockReduce(dy_mul_x_sub_mean_storage) + .Reduce(dy_mul_x_sub_mean_sum, cub::Sum()); + + if (threadIdx.x == 0) { + dy_sum_val = dy_sum; + dy_mul_x_sub_mean_sum_val = dy_mul_x_sub_mean_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + T dscale_tmp = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + dscale_tmp += ddx[index] * var_val * + (dy[index] - dy_sum_val / inner_size - + dy_mul_x_sub_mean_sum_val * (x[index] - mean_val) * + var_val * var_val / inner_size); + } + dscale_tmp = + BlockReduce(dscale_tmp_storage).Reduce(dscale_tmp, cub::Sum()); + + if (threadIdx.x == 0) { + dscale[i] += dscale_tmp; + } + __syncthreads(); + } + } +} + +// math: dscale = np.sum(ddx * dy, axis=(n,h,w)) * inv_var +template +__global__ void DoubleGradComputeDScaleWithGlobal( + const T *ddx, const T *variance, const T *dy, const double epsilon, + const int N, const int C, const int sample_size, T *dscale) { + int outer_size = C; + int inner_size = N * sample_size; + typedef cub::BlockReduce BlockReduce; + __shared__ typename BlockReduce::TempStorage ddx_mul_dy_storage; + __shared__ T ddx_mul_dy_sum_val; + for (int i = blockIdx.x; i < outer_size; i += gridDim.x) { + T inv_var_i = 1.0 / sqrt(variance[i] + epsilon); + T ddx_mul_dy_sum = 0; + for (int j = threadIdx.x; j < inner_size; j += blockDim.x) { + const int index = + layout == framework::DataLayout::kNCHW + ? (j / sample_size * C + i) * sample_size + j % sample_size + : j * outer_size + i; + T ddx_i = ddx[index]; + T dy_i = dy[index]; + ddx_mul_dy_sum += (ddx_i * dy_i); + } + ddx_mul_dy_sum = + BlockReduce(ddx_mul_dy_storage).Reduce(ddx_mul_dy_sum, cub::Sum()); + if (threadIdx.x == 0) { + ddx_mul_dy_sum_val = ddx_mul_dy_sum; + } + __syncthreads(); + + if (ddx != nullptr) { + dscale[i] = inv_var_i * ddx_mul_dy_sum_val; + } + } +} + +// math: dx = ddscale * dy * inv_var +// math: ddy = scale * ddx * inv_var +template +__global__ void DoubleGradComputeDataWithGlobal( + const T *dy, const T *scale, const T *variance, const double epsilon, + const int C, const int sample_size, const int num, T *dx) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + if (scale != nullptr) { + for (int i = gid; i < num; i += stride) { + const int c = + layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; + T inv_var = 1.0 / sqrt(variance[c] + epsilon); + dx[i] = dy[i] * scale[c] * inv_var; + } + } +} + +template +void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, + const DataLayout data_layout, const Tensor *X, + const Tensor *Scale, const Tensor *dY, + const Tensor *Saved_mean, + const Tensor *Saved_variance, const double epsilon, + const bool use_global_stats, const Tensor *ddX, + const Tensor *ddScale, const Tensor *ddBias, + Tensor *dX, Tensor *dScale, Tensor *ddY) { + const T *x_data = X->data(); + const T *dy_data = dY->data(); + const T *ddx_data = (ddX == nullptr ? nullptr : ddX->data()); + + const T *ddscale_data = (ddScale == nullptr ? nullptr : ddScale->data()); + const T *ddbias_data = (ddBias == nullptr ? nullptr : ddBias->data()); + + auto &dev_ctx = ctx.template device_context(); + math::SetConstant set_constant; + + auto &x_dims = X->dims(); + const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] + : x_dims[x_dims.size() - 1]); + const int N = x_dims[0]; + const int num = X->numel(); + const int sample_size = num / N / C; + Tensor scale_tmp; + if (!Scale) { + scale_tmp.mutable_data({C}, ctx.GetPlace()); + set_constant(dev_ctx, &scale_tmp, static_cast(1)); + } + const T *scale_data = Scale ? Scale->data() : scale_tmp.data(); + + const int block = 512; + int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int max_blocks = std::max(max_threads / block, 1); + int grid = std::min(C, max_blocks); + int grid1 = (num + block - 1) / block; + + const T *mean_data, *variance_data; + if (use_global_stats) { + const auto *running_var = ctx.Input("Variance"); + const auto *running_var_data = running_var->template data(); + variance_data = running_var_data; + } else { + const T *smean_data = Saved_mean->data(); + const T *svariance_data = Saved_variance->data(); + mean_data = smean_data; + variance_data = svariance_data; + } + + if (dX) { + T *dx_data = dX->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, dX, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNHWC><<>>( + dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, + dx_data); + } else { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNCHW><<>>( + dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, + dx_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDX< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); + } else { + DoubleGradComputeDX< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); + } + } + } + if (dScale) { + T *dscale_data = dScale->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, dScale, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDScaleWithGlobal< + T, block, DataLayout::kNHWC><<>>( + ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, + dscale_data); + } else { + DoubleGradComputeDScaleWithGlobal< + T, block, DataLayout::kNCHW><<>>( + ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, + dscale_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDScale< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); + } else { + DoubleGradComputeDScale< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); + } + } + } + if (ddY) { + T *ddy_data = ddY->mutable_data(ctx.GetPlace()); + set_constant(dev_ctx, ddY, static_cast(0)); + if (use_global_stats) { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNHWC><<>>( + ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, + ddy_data); + } else { + DoubleGradComputeDataWithGlobal< + T, DataLayout::kNCHW><<>>( + ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, + ddy_data); + } + } else { + if (data_layout == DataLayout::kNHWC) { + DoubleGradComputeDDY< + T, block, DataLayout::kNHWC><<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + } else { + DoubleGradComputeDDY< + T, block, DataLayout::kNCHW><<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + } + } + } +} + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4a750f301a0..3e7d10f8d1a 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -3167,7 +3167,7 @@ def instance_norm(input, param_shape = [channel_num] - if param_attr and bias_attr: + if param_attr != False and bias_attr != False: # create parameter scale = helper.create_parameter( attr=helper.param_attr, @@ -3190,7 +3190,7 @@ def instance_norm(input, instance_norm_out = helper.create_variable_for_type_inference(dtype) inputs = {"X": input} - if param_attr and bias_attr: + if param_attr != False and bias_attr != False: inputs["Scale"] = scale inputs["Bias"] = bias diff --git a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py index 720c9f95c25..39c6fca89cc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_double_grad.py @@ -346,7 +346,7 @@ class TestRaiseNoDoubleGradOp(TestCase): with fluid.dygraph.guard(): x = fluid.layers.ones(shape=[2, 3, 2, 2], dtype='float32') x.stop_gradient = False - y = paddle.fluid.layers.batch_norm(x) + y = paddle.fluid.layers.group_norm(x, groups=1) dx = fluid.dygraph.grad( outputs=[y], inputs=[x], create_graph=True, diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py index c44ea454271..a89b9fde7f9 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py @@ -68,5 +68,67 @@ class TestInstanceNormDoubleGradCheckWithoutParamBias( [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) +class TestBatchNormDoubleGradCheck(unittest.TestCase): + def setUp(self): + self.init_test() + + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = False + self.shape = [2, 3, 4, 5] + + @prog_scope() + def func(self, place): + prog = fluid.Program() + with fluid.program_guard(prog): + np.random.seed() + dtype = "float32" + eps = 0.005 + atol = 1e-4 + x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x') + z = fluid.layers.batch_norm( + input=x, + data_layout=self.data_layout, + use_global_stats=self.use_global_stats) + x_arr = np.random.uniform(-1, 1, self.shape).astype(dtype) + gradient_checker.double_grad_check( + [x], z, x_init=x_arr, atol=atol, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestBatchNormDoubleGradCheckCase1(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NHWC' + self.use_global_stats = False + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase2(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = True + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase3(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NHWC' + self.use_global_stats = True + self.shape = [2, 3, 4, 5] + + +class TestBatchNormDoubleGradCheckCase4(TestBatchNormDoubleGradCheck): + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = False + self.shape = [2, 2, 3, 4, 5] + + if __name__ == "__main__": unittest.main() -- GitLab From f11a53ee7626138e3d096c4c58953f78fa39dc3e Mon Sep 17 00:00:00 2001 From: LutaoChu <30695251+LutaoChu@users.noreply.github.com> Date: Mon, 21 Sep 2020 21:26:30 +0800 Subject: [PATCH 026/117] Optimize argsort Op performance on GPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * argsort op acceleration on GPU when the input size is equal to the length of the ‘axis’ dimension --- paddle/fluid/operators/argsort_op.cu | 86 ++++++++++++--- .../fluid/tests/unittests/test_argsort_op.py | 100 +++++++++++++----- 2 files changed, 142 insertions(+), 44 deletions(-) diff --git a/paddle/fluid/operators/argsort_op.cu b/paddle/fluid/operators/argsort_op.cu index cbd7e33bc6b..7fc2a92b7d9 100644 --- a/paddle/fluid/operators/argsort_op.cu +++ b/paddle/fluid/operators/argsort_op.cu @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include #include "cub/cub.cuh" #include "paddle/fluid/framework/op_registry.h" @@ -58,6 +60,16 @@ static __global__ void FillIndex(T* indices, T num_rows, T num_cols) { } } +template +static __global__ void FillFlattenGrad(const T* dO, const IndType* indices, + int64_t size, T* dX) { + int index = threadIdx.x + blockIdx.x * blockDim.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < size; i += stride) { + dX[indices[i]] = dO[i]; + } +} + template static __global__ void FillGrad(const T* dO, const IndType* indices, T* dX, IndType num_rows, IndType num_cols) { @@ -193,6 +205,23 @@ void ArgFullAssign(const platform::CUDADeviceContext& ctx, const Tensor* dO, } template +void ArgFlattenAssign(const platform::CUDADeviceContext& ctx, const Tensor* dO, + const Tensor* indices, int64_t size, Tensor* dX) { + auto cu_stream = ctx.stream(); + + const int64_t block_size = + std::min(size, static_cast(ctx.GetMaxThreadsPerBlock())); + int64_t max_threads = ctx.GetMaxPhysicalThreadCount(); + const int64_t max_blocks = + std::max(((max_threads - 1) / block_size + 1), static_cast(1)); + const int64_t grid_size = + std::min(max_blocks, (size + block_size - 1) / block_size); + + FillFlattenGrad<<>>( + dO->data(), indices->data(), size, dX->data()); +} + +template class ArgsortOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -205,8 +234,25 @@ class ArgsortOpCUDAKernel : public framework::OpKernel { auto in_dims = input->dims(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; - int64_t numel = input->numel(); - int64_t groups = numel / in_dims[axis]; + const T* in_data = input->data(); + auto size = input->numel(); + T* out_data = output->mutable_data(ctx.GetPlace()); + int64_t* ids_data = indices->mutable_data(ctx.GetPlace()); + + // Use thrust for parallel acceleration when the input size is equal to the + // length of the ‘axis’ dimension. + // Compared to the following 'Special case for full sort', ascending sort is + // 34 times faster and descending sort is 31 times faster. + if (size == in_dims[axis]) { + thrust::sequence(thrust::device, ids_data, ids_data + size); + thrust::copy(thrust::device, in_data, in_data + size, out_data); + thrust::sort_by_key(thrust::device, out_data, out_data + size, ids_data); + if (descending) { + thrust::reverse(thrust::device, out_data, out_data + size); + thrust::reverse(thrust::device, ids_data, ids_data + size); + } + return; + } // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { @@ -276,23 +322,28 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { int axis = ctx.Attr("axis"); dX->mutable_data(ctx.GetPlace()); - auto dxt = framework::EigenVector::Flatten(*dX); - auto& place = *ctx.template device_context() - .eigen_device(); - dxt.device(place) = dxt.constant(static_cast(0)); if (dO->numel() == 0) return; - auto in_dims = indices->dims(); + auto in_dims = dX->dims(); axis = (axis < 0) ? (in_dims.size() + axis) : axis; - int64_t numel = indices->numel(); + int64_t size = dX->numel(); + const auto& dev_ctx = ctx.cuda_device_context(); + + // Parallel acceleration when the input size is equal to the length of the + // ‘axis’ dimension. + // Compared to 'special case for full sort' below, the gradient calculation + // is 10 times faster. + if (size == in_dims[axis]) { + ArgFlattenAssign(dev_ctx, dO, indices, size, dX); + return; + } // Special case for full sort, speedup ~190x. if (axis == -1 || axis + 1 == in_dims.size()) { const int64_t input_height = framework::product( framework::slice_ddim(in_dims, 0, in_dims.size() - 1)); const int64_t input_width = in_dims[in_dims.size() - 1]; - const auto& dev_ctx = ctx.cuda_device_context(); ArgFullAssign(dev_ctx, dO, indices, dX, input_height, input_width); } else { @@ -316,7 +367,6 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { Tensor trans_ind; trans_ind.mutable_data(trans_dims, ctx.GetPlace()); int ndims = trans.size(); - const auto& dev_ctx = ctx.cuda_device_context(); // Do transpose TransCompute(ndims, dev_ctx, *dO, &trans_dO, trans); @@ -345,11 +395,17 @@ class ArgsortGradOpCUDAKernel : public framework::OpKernel { } // namespace paddle REGISTER_OP_CUDA_KERNEL( - argsort, paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel, - paddle::operators::ArgsortOpCUDAKernel); + argsort, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel); REGISTER_OP_CUDA_KERNEL( argsort_grad, paddle::operators::ArgsortGradOpCUDAKernel, paddle::operators::ArgsortGradOpCUDAKernel, diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py index 2a8e0e6c7f0..e324f0ec3d3 100644 --- a/python/paddle/fluid/tests/unittests/test_argsort_op.py +++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py @@ -348,57 +348,99 @@ class TestArgsortErrorOnGPU(TestArgsortErrorOnCPU): class TestArgsort(unittest.TestCase): + def init(self): + self.input_shape = [10000, ] + self.axis = 0 + def setUp(self): + self.init() if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace() - self.data = np.random.rand(2, 3, 4).astype("float32") + self.data = np.random.rand(*self.input_shape) - def test_api_0(self): + def test_api(self): with fluid.program_guard(fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") - output = paddle.argsort(x=input) - exe = fluid.Executor(self.place) - result, = exe.run(feed={'input': self.data}, fetch_list=[output]) - np_result = np.argsort(self.data) - self.assertEqual((result == np_result).all(), True) + input = fluid.data( + name="input", shape=self.input_shape, dtype="float64") + + output = paddle.argsort(input, axis=self.axis) + output2 = paddle.argsort(input, axis=self.axis, descending=True) - def test_api_1(self): - with fluid.program_guard(fluid.Program()): - input = fluid.data(name="input", shape=[2, 3, 4], dtype="float32") - output = paddle.argsort(x=input, axis=1) exe = fluid.Executor(self.place) - result, = exe.run(feed={'input': self.data}, fetch_list=[output]) - np_result = np.argsort(self.data, axis=1) + result, result2 = exe.run(feed={'input': self.data}, + fetch_list=[output, output2]) + + np_result = np.argsort(self.data, axis=self.axis) self.assertEqual((result == np_result).all(), True) + np_result2 = np.argsort(-self.data, axis=self.axis) + self.assertEqual((result2 == np_result2).all(), True) + + +class TestArgsort2(TestArgsort): + def init(self): + self.input_shape = [10000, 1] + self.axis = 0 + + +class TestArgsort3(TestArgsort): + def init(self): + self.input_shape = [1, 10000] + self.axis = 1 + + +class TestArgsort4(TestArgsort): + def init(self): + self.input_shape = [2, 3, 4] + self.axis = 1 + + +class TestArgsortImperative(unittest.TestCase): + def init(self): + self.input_shape = [10000, ] + self.axis = 0 -class TestArgsortDygraph(unittest.TestCase): def setUp(self): - self.input_data = np.random.rand(10, 10) + self.init() + self.input_data = np.random.rand(*self.input_shape) if core.is_compiled_with_cuda(): self.place = core.CUDAPlace(0) else: self.place = core.CPUPlace() - def test_api_0(self): + def test_api(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) - out = paddle.argsort(var_x) - self.assertEqual((np.argsort(self.input_data) == out.numpy()).all(), - True) - paddle.enable_static() + var_x = paddle.to_tensor(self.input_data) + out = paddle.argsort(var_x, axis=self.axis) + expect = np.argsort(self.input_data, axis=self.axis) + self.assertEqual((expect == out.numpy()).all(), True) + + out2 = paddle.argsort(var_x, axis=self.axis, descending=True) + expect2 = np.argsort(-self.input_data, axis=self.axis) + self.assertEqual((expect2 == out2.numpy()).all(), True) - def test_api_1(self): - paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) - out = paddle.argsort(var_x, axis=-1) - self.assertEqual( - (np.argsort( - self.input_data, axis=-1) == out.numpy()).all(), True) paddle.enable_static() +class TestArgsortImperative2(TestArgsortImperative): + def init(self): + self.input_shape = [10000, 1] + self.axis = 0 + + +class TestArgsortImperative3(TestArgsortImperative): + def init(self): + self.input_shape = [1, 10000] + self.axis = 1 + + +class TestArgsortImperative2(TestArgsortImperative): + def init(self): + self.input_shape = [2, 3, 4] + self.axis = 1 + + if __name__ == "__main__": unittest.main() -- GitLab From 13a4c74efd34e3ec8445792642c633f20ae331fb Mon Sep 17 00:00:00 2001 From: furnace <34057289+windstamp@users.noreply.github.com> Date: Mon, 21 Sep 2020 22:00:01 +0800 Subject: [PATCH 027/117] add mv op(c++, python, unit test) (#27024) --- paddle/fluid/operators/mv_op.cc | 125 ++++++++++++++++++ paddle/fluid/operators/mv_op.cu | 95 +++++++++++++ paddle/fluid/operators/mv_op.h | 105 +++++++++++++++ python/paddle/__init__.py | 1 + .../fluid/tests/unittests/test_mv_op.py | 94 +++++++++++++ python/paddle/tensor/__init__.py | 1 + python/paddle/tensor/linalg.py | 64 ++++++++- 7 files changed, 484 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/operators/mv_op.cc create mode 100644 paddle/fluid/operators/mv_op.cu create mode 100644 paddle/fluid/operators/mv_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_mv_op.py diff --git a/paddle/fluid/operators/mv_op.cc b/paddle/fluid/operators/mv_op.cc new file mode 100644 index 00000000000..1339982adaa --- /dev/null +++ b/paddle/fluid/operators/mv_op.cc @@ -0,0 +1,125 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/mv_op.h" +namespace paddle { +namespace operators { + +class MVOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "The matrix input of mv op"); + AddInput("Vec", "The vector input of mv op"); + AddOutput("Out", "The output of mv op"); + AddComment(R"DOC( +MV Operator. + +This operator is used to perform matrix vector multiplication +of the input tensors `X` and `Vec`. +)DOC"); + } +}; + +class MVOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *context) const override { + OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "mv"); + OP_INOUT_CHECK(context->HasInput("Vec"), "Input", "Vec", "mv"); + OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "mv"); + + auto dim_x = context->GetInputDim("X"); + auto dim_y = context->GetInputDim("Vec"); + PADDLE_ENFORCE_EQ( + dim_x.size(), 2, + platform::errors::InvalidArgument( + "The rank of input X should be 2, but is %d", dim_x.size())); + PADDLE_ENFORCE_EQ( + dim_y.size(), 1, + platform::errors::InvalidArgument( + "The rank of input Vec should be 1, but is %d", dim_y.size())); + PADDLE_ENFORCE_EQ(dim_x[1] == dim_y[0], true, + platform::errors::InvalidArgument( + "The length of input X' second dim should equal the " + "length of input Vec," + " but X[%d, %d], Vec[%d]", + dim_x[0], dim_x[1], dim_y[0])); + + framework::DDim dim_out = framework::make_ddim({dim_x[0]}); + + context->SetOutputDim("Out", dim_out); + context->ShareLoD("X", /*->*/ "Out"); + } +}; + +template +class MVOpGradMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr retv) const override { + retv->SetType("mv_grad"); + retv->SetInput("X", this->Input("X")); + retv->SetInput("Vec", this->Input("Vec")); + retv->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); + retv->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + retv->SetOutput(framework::GradVarName("Vec"), this->InputGrad("Vec")); + } +}; + +class MVOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContext *context) const override { + OP_INOUT_CHECK(context->HasInput("X"), "Input", "X", "mv"); + OP_INOUT_CHECK(context->HasInput("Vec"), "Input", "Vec", "mv"); + OP_INOUT_CHECK(context->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "mv"); + auto x_dims = context->GetInputDim("X"); + auto vec_dims = context->GetInputDim("Vec"); + + auto x_grad_name = framework::GradVarName("X"); + auto vec_grad_name = framework::GradVarName("Vec"); + + if (context->HasOutput(x_grad_name)) { + context->SetOutputDim(x_grad_name, x_dims); + } + if (context->HasOutput(vec_grad_name)) { + context->SetOutputDim(vec_grad_name, vec_dims); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OPERATOR(mv, ops::MVOp, ops::MVOpMaker, + ops::MVOpGradMaker, + ops::MVOpGradMaker); +REGISTER_OPERATOR(mv_grad, ops::MVOpGrad); + +REGISTER_OP_CPU_KERNEL( + mv, ops::MVKernel, + ops::MVKernel); +REGISTER_OP_CPU_KERNEL( + mv_grad, ops::MVGradKernel, + ops::MVGradKernel); diff --git a/paddle/fluid/operators/mv_op.cu b/paddle/fluid/operators/mv_op.cu new file mode 100644 index 00000000000..9a16fe025cd --- /dev/null +++ b/paddle/fluid/operators/mv_op.cu @@ -0,0 +1,95 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/mv_op.h" +#include "paddle/fluid/platform/gpu_launch_param_config.h" + +namespace paddle { +namespace operators { + +template +__global__ void MVGradCUDAKernel(const int m, const int n, const T *dout, + const T *vec, T *dx) { + int idx = blockDim.x * blockIdx.x + threadIdx.x; + for (; idx < m * n; idx += blockDim.x * gridDim.x) { + int i = idx / n; + int j = idx % n; + dx[idx] = dout[i] * vec[j]; + } +} + +// Using dimensional constraints on matrix multiplication, it is +// straight-forward to check the following table for when X and Y +// are both matrices. +// +// dX = | dOut Vec^T +// dVec = | X^T dOut +template +class MVGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + auto *dout = + context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dvec = + context.Output(framework::GradVarName("Vec")); + + auto dim_x = x->dims(); + int m = dim_x[0]; + int n = dim_x[1]; + + dx->Resize(framework::make_ddim({m * n})); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + const T *dout_data = dout->data(); + + T *dx_data = dx->mutable_data(context.GetPlace()); + T *dvec_data = dvec->mutable_data(context.GetPlace()); + + auto &dev_ctx = + context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + // calculate dx + auto stream = context.cuda_device_context().stream(); + auto config = GetGpuLaunchConfig1D(dev_ctx, m * n); + MVGradCUDAKernel< + T><<>>( + m, n, dout_data, vec_data, dx_data); + + dx->Resize(framework::make_ddim({m, n})); + + // calculate dvec + blas.GEMV(true, dim_x[0], dim_x[1], static_cast(1), x_data, dout_data, + static_cast(0), dvec_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_CUDA_KERNEL( + mv, ops::MVKernel, + ops::MVKernel); +REGISTER_OP_CUDA_KERNEL( + mv_grad, ops::MVGradKernel, + ops::MVGradKernel); diff --git a/paddle/fluid/operators/mv_op.h b/paddle/fluid/operators/mv_op.h new file mode 100644 index 00000000000..3c63f3640ff --- /dev/null +++ b/paddle/fluid/operators/mv_op.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/blas.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class MVKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + + auto *out = context.Output("Out"); + + auto dim_x = x->dims(); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + T *out_data = out->mutable_data(context.GetPlace()); + + auto &dev_ctx = context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + blas.GEMV(false, dim_x[0], dim_x[1], static_cast(1), x_data, vec_data, + static_cast(0), out_data); + } +}; + +// Using dimensional constraints on matrix multiplication, it is +// straight-forward to check the following table for when X and Y +// are both matrices. +// +// dX = | dOut vec^T +// dVec = | X^T dOut +template +class MVGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *vec = context.Input("Vec"); + auto *dout = + context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dvec = + context.Output(framework::GradVarName("Vec")); + + auto dim_x = x->dims(); + int m = dim_x[0]; + int n = dim_x[1]; + + dx->Resize(framework::make_ddim({m * n})); + + // get data ptr + const T *x_data = x->data(); + const T *vec_data = vec->data(); + const T *dout_data = dout->data(); + + T *dx_data = dx->mutable_data(context.GetPlace()); + T *dvec_data = dvec->mutable_data(context.GetPlace()); + + auto &dev_ctx = context.template device_context(); + auto blas = math::GetBlas(dev_ctx); + + // calculate dx + for (int i = 0; i < m; ++i) { + for (int j = 0; j < n; ++j) + dx_data[i * n + j] = dout_data[i] * vec_data[j]; + } + + dx->Resize(framework::make_ddim({m, n})); + + // calculate dvec + blas.GEMV(true, dim_x[0], dim_x[1], static_cast(1), x_data, dout_data, + static_cast(0), dvec_data); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 661471599cb..1e0dc0e07b4 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -90,6 +90,7 @@ from .tensor.linalg import cholesky #DEFINE_ALIAS # from .tensor.linalg import tensordot #DEFINE_ALIAS from .tensor.linalg import bmm #DEFINE_ALIAS from .tensor.linalg import histogram #DEFINE_ALIAS +from .tensor.linalg import mv #DEFINE_ALIAS from .tensor.logic import equal #DEFINE_ALIAS from .tensor.logic import greater_equal #DEFINE_ALIAS from .tensor.logic import greater_than #DEFINE_ALIAS diff --git a/python/paddle/fluid/tests/unittests/test_mv_op.py b/python/paddle/fluid/tests/unittests/test_mv_op.py new file mode 100644 index 00000000000..6b930e59aa5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_mv_op.py @@ -0,0 +1,94 @@ +#Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +import paddle.fluid.core as core +from op_test import OpTest + + +class TestMVOp(OpTest): + def setUp(self): + self.op_type = "mv" + self.init_config() + self.inputs = {'X': self.x, 'Vec': self.vec} + self.outputs = {'Out': np.dot(self.x, self.vec)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X', 'Vec'], 'Out') + + def init_config(self): + self.x = np.random.random((5, 100)).astype("float64") + self.vec = np.random.random((100)).astype("float64") + + +class TestMVAPI(unittest.TestCase): + def test_dygraph_api_out(self): + paddle.disable_static() + + self.x_data = np.random.random((5, 100)).astype("float64") + self.x = paddle.to_tensor(self.x_data) + self.vec_data = np.random.random((100)).astype("float64") + self.vec = paddle.to_tensor(self.vec_data) + z = paddle.mv(self.x, self.vec) + np_z = z.numpy() + z_expected = np.array(np.dot(self.x_data, self.vec_data)) + self.assertTrue(np.allclose(np_z, z_expected)) + + paddle.enable_static() + + def test_static_graph(self): + paddle.enable_static() + + self.input_x = np.random.rand(5, 100).astype("float64") + self.input_vec = np.random.rand(100).astype("float64") + + data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") + data_vec = paddle.static.data("vec", shape=[100], dtype="float64") + result_vec = paddle.mv(data_x, data_vec) + self.place = paddle.CPUPlace() + exe = paddle.static.Executor(self.place) + res, = exe.run(feed={"x": self.input_x, + "vec": self.input_vec}, + fetch_list=[result_vec]) + z_expected = np.array(np.dot(self.input_x, self.input_vec)) + self.assertTrue(np.allclose(res, z_expected)) + + +class TestMVError(unittest.TestCase): + def test_input(self): + def test_shape(): + paddle.enable_static() + + self.input_x = np.random.rand(5, 100).astype("float64") + self.input_vec = np.random.rand(100).astype("float64") + + data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") + data_vec = paddle.static.data( + "vec", shape=[100, 2], dtype="float64") + result_vec = paddle.mv(data_x, data_vec) + + self.assertRaises(ValueError, test_shape) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index a713663e182..2df9473c4b2 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -56,6 +56,7 @@ from .linalg import cholesky #DEFINE_ALIAS # from .linalg import tensordot #DEFINE_ALIAS from .linalg import bmm #DEFINE_ALIAS from .linalg import histogram #DEFINE_ALIAS +from .linalg import mv #DEFINE_ALIAS from .logic import equal #DEFINE_ALIAS from .logic import greater_equal #DEFINE_ALIAS from .logic import greater_than #DEFINE_ALIAS diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 67e3ce21ffb..f27cfba487d 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -32,7 +32,8 @@ __all__ = [ 'cholesky', # 'tensordot', 'bmm', - 'histogram' + 'histogram', + 'mv' ] @@ -920,3 +921,64 @@ def histogram(input, bins=100, min=0, max=0): 'min': min, 'max': max}) return out + + +def mv(x, vec, name=None): + """ + Performs a matrix-vector product of the matrix x and the vector vec. + + Args: + x (Variable): A tensor with shape :math:`[M, N]` , The data type of the input Tensor x + should be one of float32, float64. + vec (Variable): A tensor with shape :math:`[N]` , The data type of the input Tensor x + should be one of float32, float64. + name(str, optional): The default value is None. Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. + + Returns: + Tensor: The tensor which is producted by x and vec. + + Examples: + .. code-block:: python + + # x: [M, N], vec: [N] + # paddle.mv(x, vec) # out: [M] + + import numpy as np + import paddle + + paddle.disable_static() + x_data = np.array([[2, 1, 3], [3, 0, 1]]).astype("float64") + x = paddle.to_tensor(x_data) + vec_data = np.array([3, 5, 1]) + vec = paddle.to_tensor(vec_data).astype("float64") + out = paddle.mv(x, vec) + paddle.enable_static() + """ + if in_dygraph_mode(): + out = core.ops.mv(x, vec) + return out + + def __check_input(x, vec): + var_names = {'x': x, 'vec': vec} + for name, val in var_names.items(): + check_variable_and_dtype(val, name, ['float32', 'float64'], 'mv') + x_shape = list(x.shape) + vec_shape = list(vec.shape) + if len(x_shape) != 2: + raise ValueError( + "x should be 2-dimensional. But received x's dimention: {}". + format(x_shape)) + if len(vec_shape) != 1: + raise ValueError( + "vec should be 1-dimensional. But received vec's dimention: {}". + format(vec_shape)) + + __check_input(x, vec) + + helper = LayerHelper('mv', **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type='mv', inputs={'X': x, + 'Vec': vec}, outputs={'Out': out}) + return out -- GitLab From 3fe176df350168992dd88d0b8755dd1e70ea9f8f Mon Sep 17 00:00:00 2001 From: guofei <52460041+gfwm2013@users.noreply.github.com> Date: Mon, 21 Sep 2020 23:35:19 +0800 Subject: [PATCH 028/117] Fix test_gast_with_compatibility.py due to the problem of gast in python3.8 (#27433) test=develop --- .../unittests/test_gast_with_compatibility.py | 96 +++++++++++-------- 1 file changed, 55 insertions(+), 41 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py index c7476a8a742..c176ff09e02 100644 --- a/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py +++ b/python/paddle/fluid/tests/unittests/test_gast_with_compatibility.py @@ -16,6 +16,7 @@ from __future__ import print_function import ast import gast +import sys import textwrap import unittest @@ -143,47 +144,60 @@ class TestPythonCompatibility(unittest.TestCase): """ self._check_compatibility(source, target) - def test_with(self): - """ - The fileds `context_expr/optional_vars` of `ast.With` in PY2 - is moved into `ast.With.items.withitem` in PY3. - """ - source = """ - with guard(): - a = 1 - """ - target = """ - with guard_new(): - a = 1 - """ - self._check_compatibility(source, target) - - def test_subscript_Index(self): - source = """ - x = y()[10] - """ - target = """ - x = y()[20] - """ - self._check_compatibility(source, target) - - def test_subscript_Slice(self): - source = """ - x = y()[10:20] - """ - target = """ - x = y()[20:40] - """ - self._check_compatibility(source, target) - - def test_call(self): - source = """ - y = foo(*arg) - """ - target = """ - y = foo(*arg_new) - """ - self._check_compatibility(source, target) + # The 0.3.3 version of gast has a bug in python3.8 that + # would cause the following tests to fail. But this + # problem doesn't affect the use of Paddle's related + # functions, therefore, the following tests would be + # disable in python3.8. + # + # This problem had been fixed and updated to version + # 0.4.1 of gast. + # + # More information please refer to: + # https://github.com/serge-sans-paille/gast/issues/49 + if sys.version_info < (3, 8): + + def test_with(self): + """ + The fileds `context_expr/optional_vars` of `ast.With` in PY2 + is moved into `ast.With.items.withitem` in PY3. + """ + source = """ + with guard(): + a = 1 + """ + target = """ + with guard_new(): + a = 1 + """ + self._check_compatibility(source, target) + + def test_subscript_Index(self): + source = """ + x = y()[10] + """ + target = """ + x = y()[20] + """ + self._check_compatibility(source, target) + + def test_subscript_Slice(self): + source = """ + x = y()[10:20] + """ + target = """ + x = y()[20:40] + """ + self._check_compatibility(source, target) + + def test_call(self): + source = """ + y = foo(*arg) + """ + target = """ + y = foo(*arg_new) + """ + self._check_compatibility(source, target) if __name__ == '__main__': -- GitLab From 81823370962df89d6ecf8fd73f5a27d31ad2d3de Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Tue, 22 Sep 2020 10:03:58 +0800 Subject: [PATCH 029/117] clear pass logs (#27434) --- paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc | 3 ++- paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index 23f794c11c2..9f6032ffa5b 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -176,7 +176,8 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, return false; } if (x->IsVar() && x->Var() && x->Var()->GetShape().size() > 2) { - LOG(WARNING) << "repeated fc relu only supports input dims = 2"; + VLOG(3) << "repeated fc relu only supports input dims = 2, so it " + "is not applied."; return false; } int fc_idx = FindFCIdx(x); diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index 74ba0093a17..8bdf3940928 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -35,8 +35,6 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { const std::string pattern_name = "shufflechannel_pattern"; FusePassBase::Init(pattern_name, graph); - LOG(WARNING) << "There is fluid.layers.shuffle_channel API already, you can " - "use it instead of (reshape + transpose +reshape)"; GraphPatternDetector gpd; auto* x = gpd.mutable_pattern() ->NewNode("x") @@ -85,6 +83,9 @@ void ShuffleChannelDetectPass::ApplyImpl(ir::Graph* graph) const { // Delete the unneeded nodes. GraphSafeRemoveNodes(graph, {reshape1_op, reshape1_out, transpose_op, transpose_out, reshape2_op}); + LOG_FIRST_N(WARNING, 1) + << "There is fluid.layers.shuffle_channel API already, maybe you can " + "use it instead of (reshape + transpose + reshape)"; }; gpd(graph, handler); -- GitLab From afe94903c31c9ca66f23bc54166b160192935e2a Mon Sep 17 00:00:00 2001 From: Wilber Date: Tue, 22 Sep 2020 10:22:12 +0800 Subject: [PATCH 030/117] Rename fluid_inference to paddle_inference. (#27422) --- cmake/inference_lib.cmake | 54 +++++++++---------- go/README_cn.md | 2 +- paddle/fluid/inference/api/demo_ci/run.sh | 2 +- .../api/demo_ci/run_windows_demo.bat | 2 +- paddle/fluid/train/demo/README.md | 4 +- paddle/fluid/train/demo/run.sh | 4 +- paddle/fluid/train/imdb_demo/README.md | 4 +- paddle/scripts/paddle_build.bat | 6 +-- paddle/scripts/paddle_build.sh | 18 +++---- paddle/scripts/windows_build/build.bat | 4 +- 10 files changed, 50 insertions(+), 50 deletions(-) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 20f27715e00..e3c2409f103 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -13,11 +13,11 @@ # limitations under the License. # make package for paddle fluid shared and static library -set(FLUID_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_install_dir" CACHE STRING - "A path setting fluid shared and static libraries") +set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING + "A path setting paddle shared and static libraries") -set(FLUID_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_install_dir" CACHE STRING - "A path setting fluid inference shared and static libraries") +set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING + "A path setting paddle inference shared and static libraries") # TODO(zhaolong) # At present, the size of static lib in Windows exceeds the system limit, @@ -142,14 +142,14 @@ set(inference_lib_deps third_party paddle_fluid paddle_fluid_c paddle_fluid_shar add_custom_target(inference_lib_dist DEPENDS ${inference_lib_deps}) -set(dst_dir "${FLUID_INFERENCE_INSTALL_DIR}/third_party/threadpool") +set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/threadpool") copy(inference_lib_dist SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h DSTS ${dst_dir}) # Only GPU need cudaErrorMessage.pb IF(WITH_GPU) - set(dst_dir "${FLUID_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data") + set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data") copy(inference_lib_dist SRCS ${cudaerror_INCLUDE_DIR} DSTS ${dst_dir}) @@ -158,9 +158,9 @@ ENDIF() # CMakeCache Info copy(inference_lib_dist SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${FLUID_INFERENCE_INSTALL_DIR}) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}) -copy_part_of_thrid_party(inference_lib_dist ${FLUID_INFERENCE_INSTALL_DIR}) +copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) @@ -177,39 +177,39 @@ endif(WIN32) if(WIN32 AND NOT WITH_STATIC_LIB) copy(inference_lib_dist SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib - ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) else() copy(inference_lib_dist SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) endif() copy(inference_lib_dist SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include/internal) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/internal) copy(inference_lib_dist SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/framework/io/crypto/cipher.h - DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/crypto/) include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) # CAPI inference library for only inference -set(FLUID_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/fluid_inference_c_install_dir" CACHE STRING -"A path setting CAPI fluid inference shared") -copy_part_of_thrid_party(inference_lib_dist ${FLUID_INFERENCE_C_INSTALL_DIR}) +set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_install_dir" CACHE STRING +"A path setting CAPI paddle inference shared") +copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*) copy(inference_lib_dist SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib} - DSTS ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/include ${FLUID_INFERENCE_C_INSTALL_DIR}/paddle/lib) + DSTS ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_C_INSTALL_DIR}/paddle/lib) # fluid library for both train and inference set(fluid_lib_deps inference_lib_dist) add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps}) -set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") +set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid") set(module "inference") if(WIN32 AND NOT WITH_STATIC_LIB) copy(fluid_lib_dist @@ -273,22 +273,22 @@ copy(fluid_lib_dist DSTS ${dst_dir}/${module} ) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/eigen3") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/eigen3") copy(inference_lib_dist SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src ${EIGEN_INCLUDE_DIR}/unsupported/Eigen DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/boost") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/boost") copy(inference_lib_dist SRCS ${BOOST_INCLUDE_DIR}/boost DSTS ${dst_dir}) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/dlpack") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/dlpack") copy(inference_lib_dist SRCS ${DLPACK_INCLUDE_DIR}/dlpack DSTS ${dst_dir}) -set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/zlib") +set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/install/zlib") copy(inference_lib_dist SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES} DSTS ${dst_dir} ${dst_dir}/lib) @@ -296,8 +296,8 @@ copy(inference_lib_dist # CMakeCache Info copy(fluid_lib_dist - SRCS ${FLUID_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt - DSTS ${FLUID_INSTALL_DIR} ${FLUID_INSTALL_DIR} + SRCS ${PADDLE_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt + DSTS ${PADDLE_INSTALL_DIR} ${PADDLE_INSTALL_DIR} ) # paddle fluid version @@ -323,6 +323,6 @@ function(version version_file) endif() endfunction() -version(${FLUID_INSTALL_DIR}/version.txt) -version(${FLUID_INFERENCE_INSTALL_DIR}/version.txt) -version(${FLUID_INFERENCE_C_INSTALL_DIR}/version.txt) +version(${PADDLE_INSTALL_DIR}/version.txt) +version(${PADDLE_INFERENCE_INSTALL_DIR}/version.txt) +version(${PADDLE_INFERENCE_C_INSTALL_DIR}/version.txt) diff --git a/go/README_cn.md b/go/README_cn.md index 57af05ce0af..8ffc31adf85 100644 --- a/go/README_cn.md +++ b/go/README_cn.md @@ -1,7 +1,7 @@ # Paddle 预测golang API ## 安装 -首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``fluid_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c` +首先cmake编译时打开`-DON_INFER=ON`,在编译目录下得到``paddle_inference_c_install_dir``,将该目录移动到当前目录中并重命名为`paddle_c` ## 在Go中使用Paddle预测 首先创建预测配置 diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index d8d9e218781..6b7fb0f619a 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -7,7 +7,7 @@ DATA_DIR=$4 # dataset TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib -inference_install_dir=${PADDLE_ROOT}/build/fluid_inference_install_dir +inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir cd `dirname $0` current_dir=`pwd` diff --git a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat index 5199b83413a..523dafa6649 100644 --- a/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat +++ b/paddle/fluid/inference/api/demo_ci/run_windows_demo.bat @@ -21,7 +21,7 @@ if /i "%use_mkl%"=="N" ( ) :set_paddle_infernece_lib -SET /P paddle_infernece_lib="Please input the path of paddle inference library, such as D:\fluid_inference_install_dir =======>" +SET /P paddle_infernece_lib="Please input the path of paddle inference library, such as D:\paddle_inference_install_dir =======>" set tmp_var=!paddle_infernece_lib! call:remove_space set paddle_infernece_lib=!tmp_var! diff --git a/paddle/fluid/train/demo/README.md b/paddle/fluid/train/demo/README.md index bd53ab4b0c0..8a44c25aea9 100644 --- a/paddle/fluid/train/demo/README.md +++ b/paddle/fluid/train/demo/README.md @@ -7,7 +7,7 @@ # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -cmake .. -DFLUID_INSTALL_DIR=$PADDLE_LIB \ +cmake .. -DPADDLE_INSTALL_DIR=$PADDLE_LIB \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_GPU=OFF \ -DWITH_STYLE_CHECK=OFF \ @@ -41,7 +41,7 @@ cd build # WITH_MKLDNN=ON|OFF PADDLE_LIB=/paddle/lib/dir -# PADDLE_LIB is the same with FLUID_INSTALL_DIR when building the lib +# PADDLE_LIB is the same with PADDLE_INSTALL_DIR when building the lib cmake .. -DPADDLE_LIB=$PADDLE_LIB \ -DWITH_MKLDNN=OFF \ -DWITH_MKL=OFF diff --git a/paddle/fluid/train/demo/run.sh b/paddle/fluid/train/demo/run.sh index f7efb3b3b7d..2955e7574da 100755 --- a/paddle/fluid/train/demo/run.sh +++ b/paddle/fluid/train/demo/run.sh @@ -14,12 +14,12 @@ function download() { download # build demo trainer -fluid_install_dir=${PADDLE_ROOT}/build/fluid_install_dir +paddle_install_dir=${PADDLE_ROOT}/build/paddle_install_dir mkdir -p build cd build rm -rf * -cmake .. -DPADDLE_LIB=$fluid_install_dir \ +cmake .. -DPADDLE_LIB=$paddle_install_dir \ -DWITH_MKLDNN=$TURN_ON_MKL \ -DWITH_MKL=$TURN_ON_MKL make diff --git a/paddle/fluid/train/imdb_demo/README.md b/paddle/fluid/train/imdb_demo/README.md index ecc985e13f8..28fd66710f8 100644 --- a/paddle/fluid/train/imdb_demo/README.md +++ b/paddle/fluid/train/imdb_demo/README.md @@ -11,7 +11,7 @@ PADDLE_ROOT=./Paddle cd Paddle mkdir build cd build -cmake -DFLUID_INFERENCE_INSTALL_DIR=$PADDLE_ROOT \ +cmake -DPADDLE_INFERENCE_INSTALL_DIR=$PADDLE_ROOT \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_PYTHON=OFF \ -DWITH_MKL=OFF \ @@ -40,7 +40,7 @@ see: [IMDB Dataset of 50K Movie Reviews | Kaggle](https://www.kaggle.com/lakshmi mkdir build cd build rm -rf * - PADDLE_LIB=path/to/Paddle/build/fluid_install_dir + PADDLE_LIB=path/to/Paddle/build/paddle_install_dir cmake .. -DPADDLE_LIB=$PADDLE_LIB -DWITH_MKLDNN=OFF -DWITH_MKL=OFF make ``` diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 9e150763dbb..99450d1e15c 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -213,10 +213,10 @@ echo ======================================== for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# set end=%end:~4,10% call :timestamp "%start%" "%end%" "Build" -tree /F %cd%\fluid_inference_install_dir\paddle -%cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\fluid_inference_install_dir\paddle\lib > lib_size.txt +tree /F %cd%\paddle_inference_install_dir\paddle +%cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\paddle_inference_install_dir\paddle\lib > lib_size.txt set /p libsize=< lib_size.txt -for /F %%i in ("%libsize%") do echo "Windows FLuid_Inference Size: %%i" +for /F %%i in ("%libsize%") do echo "Windows Paddle_Inference Size: %%i" %cache_dir%\tools\busybox64.exe du -h -d 0 %cd%\python\dist > whl_size.txt set /p whlsize=< whl_size.txt for /F %%i in ("%whlsize%") do echo "Windows PR whl Size: %%i" diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index ac89116fc49..f87925056ff 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -362,12 +362,12 @@ function build_size() { Calculate /paddle/build size and PR whl size ============================================ EOF - if [ "$1" == "fluid_inference" ]; then + if [ "$1" == "paddle_inference" ]; then cd ${PADDLE_ROOT}/build - cp -r fluid_inference_install_dir fluid_inference - tar -czf fluid_inference.tgz fluid_inference - buildSize=$(du -h --max-depth=0 ${PADDLE_ROOT}/build/fluid_inference.tgz |awk '{print $1}') - echo "FLuid_Inference Size: $buildSize" + cp -r paddle_inference_install_dir paddle_inference + tar -czf paddle_inference.tgz paddle_inference + buildSize=$(du -h --max-depth=0 ${PADDLE_ROOT}/build/paddle_inference.tgz |awk '{print $1}') + echo "Paddle_Inference Size: $buildSize" else SYSTEM=`uname -s` if [ "$SYSTEM" == "Darwin" ]; then @@ -1446,7 +1446,7 @@ EOF fi endTime_s=`date +%s` echo "Build Time: $[ $endTime_s - $startTime_s ]s" - build_size "fluid_inference" + build_size "paddle_inference" } function tar_fluid_lib() { @@ -1456,10 +1456,10 @@ function tar_fluid_lib() { ======================================== EOF cd ${PADDLE_ROOT}/build - cp -r fluid_install_dir fluid + cp -r paddle_install_dir fluid tar -czf fluid.tgz fluid - cp -r fluid_inference_install_dir fluid_inference - tar -czf fluid_inference.tgz fluid_inference + cp -r paddle_inference_install_dir paddle_inference + tar -czf paddle_inference.tgz paddle_inference } function test_fluid_lib() { diff --git a/paddle/scripts/windows_build/build.bat b/paddle/scripts/windows_build/build.bat index 65d44877d12..6f99c23ccd2 100644 --- a/paddle/scripts/windows_build/build.bat +++ b/paddle/scripts/windows_build/build.bat @@ -118,8 +118,8 @@ call:Build echo PACKAGE INFERENCE LIBRARY mkdir inference_dist -%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/fluid_inference_install_dir', 'zip', root_dir='fluid_inference_install_dir')" -%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/fluid_install_dir', 'zip', root_dir='fluid_install_dir')" +%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/paddle_inference_install_dir', 'zip', root_dir='paddle_inference_install_dir')" +%PYTHON_DIR%\python.exe -c "import shutil;shutil.make_archive('inference_dist/paddle_install_dir', 'zip', root_dir='paddle_install_dir')" echo BUILD INFERENCE LIBRARY COMPLETE goto :END -- GitLab From f4c750d721a1226738bea382f6c0cf725cca8481 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Tue, 22 Sep 2020 10:28:42 +0800 Subject: [PATCH 031/117] Add the cpu version of segment sum mean max min op Add the cpu version of segment sum mean max min op --- paddle/fluid/operators/CMakeLists.txt | 2 +- paddle/fluid/operators/math/CMakeLists.txt | 1 + .../fluid/operators/math/segment_pooling.cc | 148 +++++++++++++ paddle/fluid/operators/math/segment_pooling.h | 46 ++++ paddle/fluid/operators/segment_pool_op.cc | 166 ++++++++++++++ paddle/fluid/operators/segment_pool_op.h | 130 +++++++++++ .../fluid/tests/unittests/test_segment_ops.py | 202 ++++++++++++++++++ 7 files changed, 694 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/operators/math/segment_pooling.cc create mode 100644 paddle/fluid/operators/math/segment_pooling.h create mode 100644 paddle/fluid/operators/segment_pool_op.cc create mode 100644 paddle/fluid/operators/segment_pool_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_segment_ops.py diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index f0a04d850df..53e6f4aa6e4 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -92,7 +92,7 @@ cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEP set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project -sequence_pooling executor device_memory_aligment generator) +sequence_pooling segment_pooling executor device_memory_aligment generator) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 10d335b828b..24ed4fcf668 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -76,6 +76,7 @@ math_library(prelu) math_library(bert_encoder_functor) math_library(tree2col DEPS math_function) math_library(matrix_inverse) +math_library(segment_pooling) cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) diff --git a/paddle/fluid/operators/math/segment_pooling.cc b/paddle/fluid/operators/math/segment_pooling.cc new file mode 100644 index 00000000000..3c77d3d4cf8 --- /dev/null +++ b/paddle/fluid/operators/math/segment_pooling.cc @@ -0,0 +1,148 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/math/segment_pooling.h" +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class SegmentPoolFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& segments, framework::Tensor* output, + framework::Tensor* index, + const std::string pooltype = "SUM") { + const IndexT* segment_ids = segments.data(); + auto curent_id = segment_ids[0]; + int64_t last_idx = 0; + int64_t w = input.numel() / input.dims()[0]; + auto& place = *context.eigen_device(); + for (int64_t idx = 1; idx <= segments.numel(); ++idx) { + if (idx < segments.numel()) { + if (segment_ids[idx] == curent_id) continue; + PADDLE_ENFORCE_GE(segment_ids[idx], curent_id, + platform::errors::InvalidArgument( + "The segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + idx - 1, curent_id, idx, segment_ids[idx])); + } + + Tensor out_t = output->Slice(curent_id, curent_id + 1); + Tensor in_t = input.Slice(last_idx, idx); + + int64_t h = idx - last_idx; + auto in_e = + framework::EigenMatrix::From(in_t, framework::make_ddim({h, w})); + auto out_e = framework::EigenVector::Flatten(out_t); + + auto reduce_dim = Eigen::array({{0}}); + if (pooltype == "MEAN") { + out_e.device(place) = in_e.mean(reduce_dim); + } else if (pooltype == "SUM") { + out_e.device(place) = in_e.sum(reduce_dim); + } else if (pooltype == "MAX") { + out_e.device(place) = in_e.maximum(reduce_dim); + } else if (pooltype == "MIN") { + out_e.device(place) = in_e.minimum(reduce_dim); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling type, only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + + last_idx = idx; + if (idx < segments.numel()) curent_id = segment_ids[idx]; + } + } +}; + +template +class SegmentPoolGradFunctor { + public: + void operator()(const platform::CPUDeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& out_grad, + const framework::Tensor& segments, framework::Tensor* in_grad, + const framework::Tensor* index = nullptr, + const std::string pooltype = "SUM") { + const IndexT* segment_ids = segments.data(); + auto& place = *context.eigen_device(); + auto curent_id = segment_ids[0]; + int64_t last_idx = 0; + int64_t w = in_grad->numel() / in_grad->dims()[0]; + for (int64_t idx = 1; idx <= segments.numel(); ++idx) { + if (idx < segments.numel()) { + if (segment_ids[idx] == curent_id) continue; + PADDLE_ENFORCE_GE(segment_ids[idx], curent_id, + platform::errors::InvalidArgument( + "The segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + idx - 1, curent_id, idx, segment_ids[idx])); + } + + Tensor out_g_t = out_grad.Slice(curent_id, curent_id + 1); + Tensor in_g_t = in_grad->Slice(last_idx, idx); + + int64_t h = idx - last_idx; + auto in_g_e = framework::EigenMatrix::From(in_g_t, {h, w}); + auto out_g_e = framework::EigenMatrix::From(out_g_t, {1, w}); + Eigen::DSizes bcast(h, 1); + + if (pooltype == "MEAN") { + in_g_e.device(place) = (out_g_e / static_cast(h)).broadcast(bcast); + } else if (pooltype == "SUM") { + in_g_e.device(place) = out_g_e.broadcast(bcast); + } else if (pooltype == "MAX" || pooltype == "MIN") { + Tensor out_t = output.Slice(curent_id, curent_id + 1); + Tensor in_t = input.Slice(last_idx, idx); + auto in_e = framework::EigenMatrix::From(in_t, {h, w}); + auto out_e = framework::EigenMatrix::From(out_t, {1, w}); + in_g_e.device(place) = + (in_e == out_e.broadcast(bcast)).template cast() * + out_g_e.broadcast(bcast); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling type, only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + + last_idx = idx; + if (idx < segments.numel()) curent_id = segment_ids[idx]; + } + } +}; + +using CPU = platform::CPUDeviceContext; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/math/segment_pooling.h b/paddle/fluid/operators/math/segment_pooling.h new file mode 100644 index 00000000000..561fad6921f --- /dev/null +++ b/paddle/fluid/operators/math/segment_pooling.h @@ -0,0 +1,46 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace operators { + +template +class SegmentPoolFunctor { + public: + /* mean pool has summed_ids output */ + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& segments, framework::Tensor* output, + framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM"); +}; + +template +class SegmentPoolGradFunctor { + public: + /* mean pool has summed_ids output */ + void operator()(const DeviceContext& context, const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& out_grad, + const framework::Tensor& segments, framework::Tensor* in_grad, + const framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM"); +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/segment_pool_op.cc b/paddle/fluid/operators/segment_pool_op.cc new file mode 100644 index 00000000000..322cd97f01c --- /dev/null +++ b/paddle/fluid/operators/segment_pool_op.cc @@ -0,0 +1,166 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/segment_pool_op.h" +#include +#include + +namespace paddle { +namespace operators { + +class SegmentPoolOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SegmentPool"); + OP_INOUT_CHECK(ctx->HasInput("SegmentIds"), "Input", "SegmentIds", + "SegmentPool"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "SegmentPool"); + auto dims = ctx->GetInputDim("X"); + dims[0] = -1; + ctx->SetOutputDim("Out", dims); + + if (ctx->Attrs().Get("pooltype") == "MEAN") { + OP_INOUT_CHECK(ctx->HasOutput("SummedIds"), "Output", "SummedIds", + "SegmentPool"); + ctx->SetOutputDim("SummedIds", {-1, 1}); + } + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); + } +}; + +class SegmentPoolOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) The input data of SegmentPoolOp"); + AddInput("SegmentIds", + "(Tensor) 1-D tensor which have the same size with the fist " + "dimension of input X."); + AddOutput("Out", "(Tensor) The output of SegmentPoolOp."); + AddOutput("SummedIds", + "(Tensor) This tensor is used to counts of segment ids for the " + "backward of the mean pool.") + .AsIntermediate(); + AddAttr( + "pooltype", + "(string, default 'SUM') the pooling type of SegmentPoolOp.") + .SetDefault("SUM") + .InEnum({"SUM", "MEAN", "MIN", "MAX"}); + AddComment(R"DOC( +Segment Pool Operator. + +This operator will pool the elements of input `X` which with the same index +in `SegmentIds`. + +For SUM operation, it computes a tensor such that $Out_i = \sum_{j} X_{j}$ +where sum is over j such that `SegmentIds[j] == i`. + +For MEAN operation, it computes a tensor such that +$Out_i = \frac{1}{n_i} \sum_{j} X_{j}$ where sum is over j such that +`SegmentIds[j] == i` and $n_i$ is the number of all index `SegmentIds[j] == i`. + +For MIN operation, it computes a tensor such that $Out_i = \min_{j} X_{j}$ +where min is over j such that `SegmentIds[j] == i`. + +For MAX operation, it computes a tensor such that $Out_i = \max_{j} X_{j}$ +where max is over j such that `SegmentIds[j] == i`. + )DOC"); + } +}; + +class SegmentPoolGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + framework::GradVarName("Out"), "SegmentPoolGrad"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "SegmentPoolGrad"); + auto og_dims = ctx->GetInputDim(framework::GradVarName("Out")); + auto x_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(og_dims.size(), x_dims.size(), + platform::errors::InvalidArgument( + "The rank of output grad must equal to Input(X). But " + "received: input rank %u, input shape [%s].", + og_dims.size(), og_dims)); + for (int64_t i = 1; i < og_dims.size(); ++i) { + PADDLE_ENFORCE_EQ( + og_dims[i], x_dims[i], + platform::errors::InvalidArgument( + "The dimension mismatch between Input(OUT@GRAD) and " + "Input(X). Received Input(OUT@GRAD): input rank %u, " + "input shape [%s]; received Input(X): input rank %u, " + "input shape [%s].", + og_dims.size(), og_dims, x_dims.size(), x_dims)); + } + + ctx->ShareDim("X", /*->*/ framework::GradVarName("X")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")), + ctx.device_context()); + } +}; + +template +class SegmentPoolGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op_desc_ptr) const override { + op_desc_ptr->SetType("segment_pool_grad"); + op_desc_ptr->SetInput("X", this->Input("X")); + op_desc_ptr->SetInput("SegmentIds", this->Input("SegmentIds")); + op_desc_ptr->SetInput("Out", this->Output("Out")); + if (BOOST_GET_CONST(std::string, this->GetAttr("pooltype")) == "MEAN") { + op_desc_ptr->SetInput("SummedIds", this->Output("SummedIds")); + } + op_desc_ptr->SetInput(framework::GradVarName("Out"), + this->OutputGrad("Out")); + op_desc_ptr->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op_desc_ptr->SetAttrMap(this->Attrs()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(segment_pool, ops::SegmentPoolOp, ops::SegmentPoolOpMaker, + ops::SegmentPoolGradOpMaker, + ops::SegmentPoolGradOpMaker); +REGISTER_OPERATOR(segment_pool_grad, ops::SegmentPoolGradOp); + +REGISTER_OP_CPU_KERNEL( + segment_pool, + ops::SegmentPoolKernel, + ops::SegmentPoolKernel); + +REGISTER_OP_CPU_KERNEL( + segment_pool_grad, + ops::SegmentPoolGradKernel, + ops::SegmentPoolGradKernel); diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h new file mode 100644 index 00000000000..a505946b9f5 --- /dev/null +++ b/paddle/fluid/operators/segment_pool_op.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/segment_pooling.h" +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { + auto* input = context.Input("X"); + auto* segment = context.Input("SegmentIds"); + auto* output = context.Output("Out"); + std::string pooltype = context.Attr("pooltype"); + Tensor* summed_ids = nullptr; + + int64_t num_indices = segment->numel(); + PADDLE_ENFORCE_EQ( + num_indices, input->dims()[0], + platform::errors::InvalidArgument( + "Segment_ids should be the same size as dimension 0 of input X.")); + PADDLE_ENFORCE_EQ(num_indices, segment->dims()[0], + platform::errors::InvalidArgument( + "Segment_ids should be 1-D tensor, or it's other " + "dimension size is 1. Segment_ids's shape is: [%s].", + segment->dims())); + + if (input->numel() == 0 || segment->numel() == 0) { + return; + } + + bool cpu_place = context.GetPlace().type() == typeid(platform::CPUPlace); + if (cpu_place) { + auto dims = input->dims(); + auto* segment_ids = segment->data(); + dims[0] = static_cast(segment_ids[segment->numel() - 1] + 1); + PADDLE_ENFORCE_GT( + dims[0], 0, + platform::errors::InvalidArgument( + "Segment ids must be >= 0, but got last id %d", dims[0])); + output->Resize({dims}); + output->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + auto& dev_ctx = context.template device_context(); + set_zero(dev_ctx, output, static_cast(0)); + } + + SegmentPoolFunctor pool; + + pool(context.template device_context(), *input, *segment, + output, summed_ids, pooltype); +} + +template +class SegmentPoolKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* segment = context.Input("SegmentIds"); + auto index_type = segment->type(); + if (index_type == framework::proto::VarType::INT32) { + SegmentKernelLaunchHelper(context); + } else if (index_type == framework::proto::VarType::INT64) { + SegmentKernelLaunchHelper(context); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported index type, Expected int, int64, but got %s.", + index_type)); + } + } +}; + +template +class SegmentPoolGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* input = context.Input("X"); + auto* output = context.Input("Out"); + auto* segment = context.Input("SegmentIds"); + auto* out_g = context.Input(framework::GradVarName("Out")); + auto* in_g = context.Output(framework::GradVarName("X")); + std::string pooltype = context.Attr("pooltype"); + + const Tensor* summed_ids = nullptr; + if (pooltype == "MEAN") { + summed_ids = context.Input("SummedIds"); + } + + in_g->mutable_data(context.GetPlace()); + math::SetConstant set_zero; + auto& dev_ctx = context.template device_context(); + set_zero(dev_ctx, in_g, static_cast(0)); + + auto index_type = segment->type(); + if (index_type == framework::proto::VarType::INT32) { + SegmentPoolGradFunctor pool; + pool(context.template device_context(), *input, *output, + *out_g, *segment, in_g, summed_ids, pooltype); + } else if (index_type == framework::proto::VarType::INT64) { + SegmentPoolGradFunctor pool; + pool(context.template device_context(), *input, *output, + *out_g, *segment, in_g, summed_ids, pooltype); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported index type, Expected int, int64, but got %s.", + index_type)); + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_segment_ops.py b/python/paddle/fluid/tests/unittests/test_segment_ops.py new file mode 100644 index 00000000000..b58d66676b0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_segment_ops.py @@ -0,0 +1,202 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import sys +from op_test import OpTest + + +def compute_segment_sum(x, segment_ids): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + results = np.zeros(target_shape, dtype=x.dtype) + for index, ids in enumerate(segment_ids): + results[ids, :] += x[index, :] + return results + + +def compute_segment_mean(x, segment_ids): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + results = np.zeros(target_shape, dtype=x.dtype) + count = np.zeros(length, dtype=x.dtype) + 1e-8 + for index, ids in enumerate(segment_ids): + results[ids, :] += x[index, :] + count[ids] += 1 + results = results / count.reshape([-1, 1]) + return results + + +def compute_segment_min_max(x, segment_ids, pooltype="MAX"): + length = segment_ids[-1] + 1 + target_shape = list(x.shape) + target_shape[0] = length + gradient = np.zeros_like(x) + results = np.zeros(target_shape, dtype=x.dtype) + last_idx = 0 + current_id = segment_ids[0] + for idx in range(1, len(segment_ids) + 1): + if idx < len(segment_ids): + if segment_ids[idx] == current_id: + continue + sub_x = x[last_idx:idx, :] + if pooltype == "MAX": + results[current_id] = np.amax(sub_x, axis=0) + elif pooltype == "MIN": + results[current_id] = np.amin(sub_x, axis=0) + else: + raise ValueError("Invalid pooltype, only MAX, MIN supported!") + gradient[last_idx:idx, :][sub_x == results[current_id]] = 1 + last_idx = idx + if idx < len(segment_ids): + current_id = segment_ids[idx] + + return results, gradient / results.size + + +class TestSegmentOps(OpTest): + def set_data(self): + x = np.random.uniform(-1, 1, self.shape).astype(self.dtype) + segment_ids = self.set_segment(len(x), len(x) // 5 + 1) + return x, segment_ids + + def set_segment(self, origin_len, reduce_len): + segment = np.zeros(reduce_len, dtype='int64') + segment = np.random.randint(0, reduce_len, size=[origin_len]) + segment = np.sort(segment) + return segment.astype('int64') + + def compute(self, x, segment_ids): + return compute_segment_sum(x, segment_ids) + + def prepare(self): + self.op_type = "segment_pool" + self.dtype = np.float64 + self.shape = [30, 15] + self.attrs = {"pooltype": "SUM"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int64) + } + self.outputs = {'Out': result.astype(self.dtype)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + +class TestSegmentSum2(TestSegmentOps): + def prepare(self): + super(TestSegmentSum2, self).prepare() + self.shape = [40, 20] + self.dtype = np.float32 + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int32) + } + self.outputs = {'Out': result.astype(self.dtype)} + + +class TestSegmentMax(TestSegmentOps): + def compute(self, x, segment_ids): + return compute_segment_min_max(x, segment_ids, pooltype="MAX") + + def prepare(self): + super(TestSegmentMax, self).prepare() + self.shape = [40, 20] + self.attrs = {'pooltype': "MAX"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result, self.gradient = self.compute(x, segment_ids) + self.inputs = { + 'X': x.astype(self.dtype), + 'SegmentIds': segment_ids.astype(np.int32) + } + self.outputs = {'Out': result.astype(self.dtype)} + + def test_check_grad(self): + self.check_grad(["X"], "Out", user_defined_grads=[self.gradient]) + + +class TestSegmentMax2(TestSegmentMax): + def prepare(self): + super(TestSegmentMax2, self).prepare() + self.dtype = np.float32 + + +class TestSegmentMin(TestSegmentMax): + def compute(self, x, segment_ids): + return compute_segment_min_max(x, segment_ids, pooltype="MIN") + + def prepare(self): + super(TestSegmentMin, self).prepare() + self.attrs = {'pooltype': "MIN"} + + +class TestSegmentMin2(TestSegmentMin): + def prepare(self): + super(TestSegmentMin2, self).prepare() + self.dtype = np.float32 + + +class TestSegmentMean(TestSegmentOps): + def compute(self, x, segment_ids): + return compute_segment_mean(x, segment_ids) + + def prepare(self): + super(TestSegmentMean, self).prepare() + self.shape = [40, 20] + self.attrs = {'pooltype': "MEAN"} + + def setUp(self): + self.prepare() + x, segment_ids = self.set_data() + result = self.compute(x, segment_ids) + self.inputs = {'X': x, 'SegmentIds': segment_ids} + self.outputs = { + 'Out': result, + 'SummedIds': compute_segment_sum( + np.ones([len(x), 1]).astype(self.dtype), segment_ids) + } + + +class TestSegmentMean2(TestSegmentMean): + def prepare(self): + super(TestSegmentMean2, self).prepare() + self.dtype = np.float32 + self.shape = [30, 20] + self.attrs = {'pooltype': "MEAN"} + + +if __name__ == '__main__': + unittest.main() -- GitLab From 18f2ea66bd8d4c3f9d9f6c59556851316f421e6d Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Tue, 22 Sep 2020 13:19:18 +0800 Subject: [PATCH 032/117] remove paddle.readers API (#26727) * remove paddle.readers API. test=develop --- python/paddle/__init__.py | 1 - python/paddle/dataset/flowers.py | 2 +- python/paddle/fluid/io.py | 6 +++--- .../unittests/test_decoupled_py_reader_data_check.py | 2 +- .../test_imperative_star_gan_with_gradient_penalty.py | 2 +- .../unittests/test_multiprocess_reader_exception.py | 3 ++- .../tests/unittests/test_py_reader_combination.py | 5 +++-- python/paddle/io/__init__.py | 10 ---------- python/paddle/reader/__init__.py | 2 +- python/paddle/tensor/__init__.py | 1 - python/paddle/tensor/random.py | 3 --- 11 files changed, 12 insertions(+), 25 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 1e0dc0e07b4..29e739a0edf 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -204,7 +204,6 @@ from .tensor.math import prod #DEFINE_ALIAS from .tensor.random import standard_normal from .tensor.random import normal from .tensor.random import uniform #DEFINE_ALIAS -from .tensor.random import shuffle #DEFINE_ALIAS from .tensor.random import randn #DEFINE_ALIAS from .tensor.random import rand #DEFINE_ALIAS from .tensor.random import randint #DEFINE_ALIAS diff --git a/python/paddle/dataset/flowers.py b/python/paddle/dataset/flowers.py index 969ad3c922f..bb60c58211c 100644 --- a/python/paddle/dataset/flowers.py +++ b/python/paddle/dataset/flowers.py @@ -37,7 +37,7 @@ from .common import download import tarfile import scipy.io as scio from paddle.dataset.image import * -from paddle.reader import * +from paddle.reader import map_readers, xmap_readers from paddle import compat as cpt import os import numpy as np diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 6e5f7fd035a..fe5b683bdea 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -26,13 +26,13 @@ from functools import reduce import numpy as np import paddle -import paddle.reader -from paddle.reader import * from paddle.fluid import layers from paddle.fluid.executor import Executor, global_scope from paddle.fluid.evaluator import Evaluator from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable, \ program_guard, dygraph_not_support +from paddle.reader import cache, map_readers, buffered, compose, chain, shuffle, \ + ComposeNotAligned, firstn, xmap_readers, multiprocess_reader from .wrapped_decorator import signature_safe_contextmanager from paddle.fluid.compiler import CompiledProgram from paddle.fluid.log_helper import get_logger @@ -62,7 +62,7 @@ __all__ = [ 'set_program_state', 'get_program_parameter', 'get_program_persistable_vars', -] + reader.__all__ + paddle.reader.__all__ +] + reader.__all__ _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py index 4d767709ef5..b2cb3141aad 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader_data_check.py @@ -37,7 +37,7 @@ class TestClass(unittest.TestCase): low=0, high=9, size=label_shape).astype('int64') yield img, label - reader = fluid.io.cache(fake_reader) + reader = paddle.reader.cache(fake_reader) batch_reader = fluid.io.batch(reader, batch_size=batch_size) places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py index e94157fa047..1ab37aaed23 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_star_gan_with_gradient_penalty.py @@ -592,7 +592,7 @@ class TestStarGANWithGradientPenalty(unittest.TestCase): cfg = Config(place) dataset = create_mnist_dataset(cfg) - dataset = fluid.io.cache(dataset) + dataset = paddle.reader.cache(dataset) static_graph_model = StaticGraphTrainModel(cfg) static_loss = [] diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py index 39cb6651a4b..9634f5af30a 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_reader_exception.py @@ -12,8 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid as fluid -from paddle.fluid.io import multiprocess_reader +from paddle.reader import multiprocess_reader import unittest import numpy as np import six diff --git a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py index 2d977caa033..624927d809f 100644 --- a/python/paddle/fluid/tests/unittests/test_py_reader_combination.py +++ b/python/paddle/fluid/tests/unittests/test_py_reader_combination.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import paddle import paddle.fluid as fluid import unittest import numpy as np @@ -60,8 +61,8 @@ class TestPyReaderCombination(unittest.TestCase): py_reader2 = fluid.io.PyReader( feed_list=[image, label], capacity=16, iterable=True) - reader1 = fluid.io.cache(self.create_reader(self.n1)) - reader2 = fluid.io.cache(self.create_reader(self.n2)) + reader1 = paddle.reader.cache(self.create_reader(self.n1)) + reader2 = paddle.reader.cache(self.create_reader(self.n2)) py_reader1.decorate_batch_generator(reader1, places=place) py_reader2.decorate_batch_generator(reader2, places=place) diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index b67779cb2a2..6f0b0f3c9c1 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -31,15 +31,6 @@ __all__ = [ 'set_program_state', 'load_inference_model', 'save_inference_model', - 'batch', - 'shuffle', - 'buffered', - 'cache', - 'chain', - 'firstn', - 'compose', - 'map_readers', - 'xmap_readers' ] from ..fluid.io import DataLoader @@ -47,4 +38,3 @@ from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worke TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler from ..fluid.io import load, save, load_program_state, set_program_state, \ load_inference_model, save_inference_model, batch -from ..reader import shuffle, buffered, cache, chain, firstn, compose, map_readers, xmap_readers diff --git a/python/paddle/reader/__init__.py b/python/paddle/reader/__init__.py index 29337cf0668..881cfd81314 100644 --- a/python/paddle/reader/__init__.py +++ b/python/paddle/reader/__init__.py @@ -66,4 +66,4 @@ An example implementation for multiple item data reader creator: import paddle.reader.decorator from paddle.reader.decorator import * -__all__ = decorator.__all__ +__all__ = [] diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 2df9473c4b2..cec989fba8b 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -171,7 +171,6 @@ from .math import prod #DEFINE_ALIAS from .random import standard_normal from .random import normal from .random import uniform #DEFINE_ALIAS -from .random import shuffle #DEFINE_ALIAS from .random import randn #DEFINE_ALIAS from .random import rand #DEFINE_ALIAS from .random import randint #DEFINE_ALIAS diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index b38a1d0f5b7..9ffd81995ed 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -21,14 +21,11 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid.layers import utils import paddle -from ..fluid.io import shuffle #DEFINE_ALIAS - __all__ = [ 'bernoulli', 'standard_normal', 'normal', 'uniform', - 'shuffle', 'randn', 'rand', 'randint', -- GitLab From b7371fa55dab7013edce0c35401837d4c8ffd571 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Tue, 22 Sep 2020 13:28:44 +0800 Subject: [PATCH 033/117] judge whether remove build dir to accelerate compile,test=develop (#27334) --- paddle/scripts/paddle_build.bat | 58 ++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 99450d1e15c..60e4496bc54 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -20,14 +20,51 @@ rem Paddle CI Task On Windows Platform rem ================================================= rem -------clean up environment----------- -wmic process where name="op_function_generator.exe" call terminate 2>NUL set work_dir=%cd% -mkdir build +wmic process where name="op_function_generator.exe" call terminate 2>NUL + +rem ------initialize common variable------ +if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" +if not defined BRANCH set BRANCH=develop +if not defined WITH_MKL set WITH_MKL=ON +if not defined WITH_GPU set WITH_GPU=OFF +if not defined WITH_AVX set WITH_AVX=ON +if not defined WITH_TESTING set WITH_TESTING=ON +if not defined WITH_PYTHON set WITH_PYTHON=ON +if not defined ON_INFER set ON_INFER=ON +if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON +if not defined WITH_CACHE set WITH_CACHE=ON +if not defined WITH_TPCACHE set WITH_TPCACHE=ON + +rem -------set cache build work directory----------- +if "%WITH_CACHE%"=="OFF" ( + rmdir build /s/q + goto :mkbuild +) + +for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# +set day_now=%datetime:~6,2% +set day_before=-1 +set /p day_before= day.txt + type day.txt + rmdir build /s/q +) +git diff origin/develop --stat --name-only | findstr "cmake CMakeLists.txt paddle_build.bat" +if %ERRORLEVEL% EQU 0 ( + rmdir build /s/q +) + +:mkbuild +if not exist build ( + mkdir build +) cd /d build -tree . +dir . dir paddle\fluid\pybind\Release -rem ------initialize the virtual environment------ +rem ------initialize the python environment------ if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37 set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH% @@ -38,7 +75,7 @@ rem %PYTHON_EXECUTABLE% -m pip install virtualenv rem %PYTHON_EXECUTABLE% -m virtualenv paddle_winci rem call paddle_winci\Scripts\activate.bat -rem ------pre install requirement---------- +rem ------pre install python requirement---------- where python where pip pip install --upgrade pip --user @@ -62,16 +99,6 @@ set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G clcache.exe -M 21474836480 -rem ------initialize common variable------ -if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" -if not defined BRANCH set BRANCH=develop -if not defined WITH_AVX set WITH_AVX=ON -if not defined WITH_TESTING set WITH_TESTING=ON -if not defined WITH_PYTHON set WITH_PYTHON=ON -if not defined ON_INFER set ON_INFER=ON -if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON -if not defined WITH_TPCACHE set WITH_TPCACHE=ON - rem ------set cache third_party------ set cache_dir=%work_dir:Paddle=cache% dir %cache_dir% @@ -418,6 +445,7 @@ taskkill /f /im rc.exe 2>NUL wmic process where name="op_function_generator.exe" call terminate 2>NUL taskkill /f /im python.exe 2>NUL call paddle_winci\Scripts\deactivate.bat 2>NUL +del %PADDLE_WHL_FILE_WIN% taskkill /f /im python.exe 2>NUL echo Windows CI run successfully! exit /b 0 -- GitLab From dd4c2d86a5f6df51091983b52badcad744548793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Tue, 22 Sep 2020 13:42:29 +0800 Subject: [PATCH 034/117] enhance error messages, test=develop (#27423) --- paddle/fluid/inference/capi/pd_predictor.cc | 3 +- .../fused/fusion_seqpool_cvm_concat_op.cc | 31 +++++++++++++------ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 0509a619021..31915496893 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -130,7 +130,8 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, VLOG(3) << "The inputs' size is " << input_names.size(); PADDLE_ENFORCE_EQ( input_names.size(), in_size, - "The number of input and the number of model's input must match. "); + paddle::platform::errors::InvalidArgument( + "The number of input and the number of model's input must match.")); for (int i = 0; i < in_size; ++i) { auto input_t = predictor->GetInputTensor(inputs[i].name); std::vector tensor_shape; diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index f64e4f134d6..ecb7db46a9d 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -24,20 +24,27 @@ void FusionSeqPoolCVMConcatOp::InferShape( framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_GE( ctx->Inputs("X").size(), 1UL, - "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of FusionSeqPoolCVMConcatOp should not be null."); + paddle::platform::errors::InvalidArgument( + "Inputs(X) of FusionSeqPoolCVMConcatOp should not be empty.")); + PADDLE_ENFORCE( + ctx->HasOutput("Out"), + paddle::platform::errors::InvalidArgument( + "Output(Out) of FusionSeqPoolCVMConcatOp should not be null.")); int axis = ctx->Attrs().Get("axis"); PADDLE_ENFORCE_EQ( - axis, 1, "FusionSeqPoolCVMConcatOp only supports concat axis=1 yet."); + axis, 1, + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports concat axis=1 yet.")); bool use_cvm = ctx->Attrs().Get("use_cvm"); PADDLE_ENFORCE_EQ( use_cvm, true, - "FusionSeqPoolCVMConcatOp only supports use_cvm is true yet."); + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports use_cvm is true yet.")); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, 0UL, "Input tensors count should > 0."); + PADDLE_ENFORCE_GT(n, 0UL, paddle::platform::errors::InvalidArgument( + "Input tensors count should > 0.")); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } @@ -45,7 +52,8 @@ void FusionSeqPoolCVMConcatOp::InferShape( // The output height should be confirmed in Compute, // since input lod is not accessible here. PADDLE_ENFORCE_EQ(ins_dims[0].size(), 2, - "The dims size of first input should be 2."); + paddle::platform::errors::InvalidArgument( + "The dims size of first input should be 2.")); ctx->SetOutputDim("Out", {-1, ins_dims[0][axis] * static_cast(n)}); } @@ -99,7 +107,8 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { int w = ins[0]->numel() / x0_dims[0]; PADDLE_ENFORCE_EQ(y_dims[1] % w, 0, - "The output of dims[1] should be dividable of w"); + paddle::platform::errors::InvalidArgument( + "The output of dims[1] should be dividable of w")); jit::seq_pool_attr_t attr(w, jit::SeqPoolType::kSum); if (pooltype == "AVERAGE") { attr.type = jit::SeqPoolType::kAvg; @@ -117,9 +126,11 @@ class FusionSeqPoolCVMConcatKernel : public framework::OpKernel { const T* src = ins[i]->data(); T* dst = y_data + i * w; PADDLE_ENFORCE_EQ(static_cast(ins[i]->numel() / x_dims[0]), w, - "Width of all inputs should be equal."); + paddle::platform::errors::InvalidArgument( + "Width of all inputs should be equal.")); PADDLE_ENFORCE_EQ(x_lod.size(), bs + 1, - "Batchsize of all inputs should be equal."); + paddle::platform::errors::InvalidArgument( + "Batchsize of all inputs should be equal.")); for (size_t j = 0; j < bs; ++j) { attr.h = static_cast(x_lod[j + 1] - x_lod[j]); seqpool(src, dst, &attr); -- GitLab From 7ba6279a2517884622f5fa56da1f91ab61e812f0 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Tue, 22 Sep 2020 14:08:03 +0800 Subject: [PATCH 035/117] [Dy2stat] Refine error msg of @to_static if not in imperative mode (#27371) * refine error mesg --- .../dygraph_to_static/program_translator.py | 17 +++++++++++ python/paddle/fluid/dygraph/layers.py | 1 + .../dygraph_to_static/test_declarative.py | 19 ++++++++++++ .../test_program_translator.py | 29 +++++++++++++++++++ 4 files changed, 66 insertions(+) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 5218c0aac95..3b3b9bbe96f 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -370,6 +370,7 @@ class StaticLayer(object): Returns: Traced ConcreteProgram and executable translated Layer. """ + # 1. unify args/kwargs and replace Tensor with InputSpec if len(args) != len(self._function_spec.args_name): args, kwargs = self._function_spec.unified_args_and_kwargs(args, @@ -522,6 +523,19 @@ def _switch_declarative_mode_guard_(is_declarative=True): _in_declarative_mode_ = original_val +def _verify_init_in_dynamic_mode(class_instance): + """ + Verifies the instance is initialized in dynamic mode. + """ + if isinstance(class_instance, layers.Layer): + if not class_instance._init_in_dynamic_mode: + raise RuntimeError( + " `paddle.jit.to_static` is only available in dynamic mode. Please call `paddle.disable_static()` before " + "initializing your Layer class `{}` . Because parameters of Layer class should be initialized firstly " + "in dynamic mode while applying transformation.".format( + class_instance)) + + class ConcreteProgram(object): __slots__ = [ @@ -554,6 +568,9 @@ class ConcreteProgram(object): func_spec(FunctionSpec): A FunctionSpec instance for decorated function. input_spec(list[InputSpec]): """ + # verify the instance is initialized in imperative mode. + _verify_init_in_dynamic_mode(class_instance) + # Transforms dygraph function into static function and caches it. dygraph_function = func_spec.dygraph_function static_func = convert_to_static(dygraph_function) diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 7075024369f..9c79deaab73 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -91,6 +91,7 @@ class Layer(core.Layer): self._helper = LayerObjectHelper(self._full_name) self._built = False self._dtype = dtype + self._init_in_dynamic_mode = framework.in_dygraph_mode() self._parameters = collections.OrderedDict() # Buffers the variable (not parameter) created in layer diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index 5582a65304d..450ef7557bc 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -358,5 +358,24 @@ class TestDecorateModelDirectly(unittest.TestCase): self.assertListEqual(list(input_shape), [-1, 16, 10]) +class TestErrorWithInitFromStaticMode(unittest.TestCase): + def test_raise_error(self): + # disable imperative + paddle.enable_static() + + net = SimpleNet() + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.concrete_program + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.inputs + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + net.forward.outputs + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py index 873d9ecb535..b0ab55758ee 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_program_translator.py @@ -21,6 +21,7 @@ import numpy as np import textwrap import unittest +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph.dygraph_to_static import ProgramTranslator from paddle.fluid.dygraph.jit import declarative @@ -279,5 +280,33 @@ class TestEnableDeclarative(unittest.TestCase): static_output.numpy(), dygraph_output.numpy(), atol=1e-4)) +class Net(fluid.dygraph.layers.Layer): + def __init__(self): + super(Net, self).__init__() + + def forward(self, x): + return x + 1 + + +class TestErrorWithInitFromStaticMode(unittest.TestCase): + def setUp(self): + self.program_translator = ProgramTranslator() + self.x = np.random.randn(10, 32).astype('float32') + + def test_raise_error(self): + # disable imperative + paddle.enable_static() + net = Net() + + self.program_translator.enable(True) + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + self.program_translator.get_output(net.forward, self.x) + + with self.assertRaisesRegexp(RuntimeError, + "only available in dynamic mode"): + self.program_translator.get_program(net.forward, self.x) + + if __name__ == '__main__': unittest.main() -- GitLab From 9f3a9be76ac5b1345acd3c9e5397b7fa96f2db39 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Tue, 22 Sep 2020 15:00:30 +0800 Subject: [PATCH 036/117] update python 2.7.15 (#27435) --- paddle/scripts/paddle_build.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index f87925056ff..69303013d2a 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -140,18 +140,18 @@ function cmake_base() { if [ "$1" != "" ]; then echo "using python abi: $1" if [ "$1" == "cp27-cp27m" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs4/lib:} + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs4/lib:} export PATH=/opt/python/cp27-cp27m/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27m/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27m/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs2/lib/libpython2.7.so" + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.15-ucs2/lib/libpython2.7.so" pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27mu" ]; then - export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.11-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.11-ucs2/lib:} + export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs4/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs2/lib:} export PATH=/opt/python/cp27-cp27mu/bin/:${PATH} PYTHON_FLAGS="-DPYTHON_EXECUTABLE:FILEPATH=/opt/python/cp27-cp27mu/bin/python -DPYTHON_INCLUDE_DIR:PATH=/opt/python/cp27-cp27mu/include/python2.7 - -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.11-ucs4/lib/libpython2.7.so" + -DPYTHON_LIBRARIES:FILEPATH=/opt/_internal/cpython-2.7.15-ucs4/lib/libpython2.7.so" pip install -r ${PADDLE_ROOT}/python/requirements.txt elif [ "$1" == "cp27-cp27m-gcc82" ]; then export LD_LIBRARY_PATH=/opt/_internal/cpython-2.7.15-ucs2/lib:${LD_LIBRARY_PATH#/opt/_internal/cpython-2.7.15-ucs4/lib:} -- GitLab From 905e2346acb72c9bd8c0d955473141bc5e02107e Mon Sep 17 00:00:00 2001 From: danleifeng <52735331+danleifeng@users.noreply.github.com> Date: Tue, 22 Sep 2020 15:23:10 +0800 Subject: [PATCH 037/117] add endpoints log;test=develop (#27439) --- python/paddle/distributed/fleet/launch.py | 5 ++--- python/paddle/distributed/fleet/launch_utils.py | 8 ++++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 17fa0a0c7c3..d63c9f9184c 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -463,9 +463,8 @@ def launch(): cuda_device_num = 0 if len(has_ps_args) > 0 or cuda_device_num == 0: - logger.info( - "Run parameter-sever cpu mode. pserver arguments:{}, cuda count:{}". - format(has_ps_args, cuda_device_num)) + logger.info("Run parameter-sever cpu mode. pserver arguments:{}".format( + has_ps_args)) launch_ps(args) elif len(has_collective_args) > 0: logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}". diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 17d3b96cf44..7540cd9f4c1 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -435,9 +435,17 @@ def start_local_trainers(cluster, len(pod.trainers), pretty_print_envs(proc_env, ("Distributed Envs", "Value")))) + logger.info( + "details abouts PADDLE_TRAINER_ENDPOINTS can be found in {}/endpoints.log.". + format(log_dir)) fn = None if log_dir is not None: os.system("mkdir -p {}".format(log_dir)) + if os.path.exists("%s/endpoints.log" % log_dir): + os.system("rm -f {}/endpoints.log".format(log_dir)) + with open("%s/endpoints.log" % log_dir, "w") as f: + f.write("PADDLE_TRAINER_ENDPOINTS: \n") + f.write("\n".join(cluster.trainers_endpoints())) fn = open("%s/workerlog.%d" % (log_dir, idx), "a") proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn) else: -- GitLab From fda54c0212fac68ec83d95725665703012afa651 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Tue, 22 Sep 2020 16:43:50 +0800 Subject: [PATCH 038/117] errmsg refine of trt plugin (#27309) --- .../fluid/inference/tensorrt/plugin/trt_plugin_factory.cc | 6 ++++-- paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h | 7 ++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc index 3c20b6d1e72..76b0832c546 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc @@ -25,8 +25,10 @@ PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, const char* plugin_type; DeserializeValue(&serial_data, &serial_length, &plugin_type); - PADDLE_ENFORCE(Has(plugin_type), - "trt plugin type %s does not exists, check it.", plugin_type); + PADDLE_ENFORCE_EQ( + Has(plugin_type), true, + platform::errors::NotFound( + "trt plugin type %s does not exists, check it.", plugin_type)); auto plugin = plugin_registry_[plugin_type](serial_data, serial_length); owned_plugins_.emplace_back(plugin); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 18037179c7b..6fcb70c6d32 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -103,7 +103,12 @@ struct Serializer, DeserializeValue(buffer, buffer_size, &size); value->resize(size); size_t nbyte = value->size() * sizeof(T); - PADDLE_ENFORCE_GE(*buffer_size, nbyte); + PADDLE_ENFORCE_GE( + *buffer_size, nbyte, + platform::errors::InvalidArgument("Expect buffer size >= value size in " + "trt plugin deserialization, but got " + "buffer size = %d, value size = %d.", + *buffer_size, nbyte)); std::memcpy(value->data(), *buffer, nbyte); reinterpret_cast(*buffer) += nbyte; *buffer_size -= nbyte; -- GitLab From 0a862fd356c6c7aa78d1dffbcd599ddb3febfeda Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 22 Sep 2020 02:06:37 -0700 Subject: [PATCH 039/117] refine the precious of linspace Op using half way (#27452) --- paddle/fluid/operators/linspace_op.cu | 41 +++++++++++++++++---------- paddle/fluid/operators/linspace_op.h | 8 +++++- python/paddle/fluid/layers/tensor.py | 2 +- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index c51e8785263..a4f06933232 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -23,9 +23,16 @@ namespace operators { using Tensor = framework::Tensor; template -__global__ void LinspaceKernel(T start, double step, int64_t size, T* out) { - CUDA_KERNEL_LOOP(index, size) { - out[index] = static_cast(start + step * index); +__global__ void LinspaceKernel(T start, T stop, double step, int64_t size, + T* out) { + int64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + for (; index < size; index += blockDim.x * gridDim.x) { + if (index < size / 2) { + out[index] = static_cast(start + step * index); + } else { + out[index] = static_cast(stop - step * (size - index - 1)); + } } } @@ -55,13 +62,15 @@ class CUDALinspaceKernel : public framework::OpKernel { framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); - framework::Tensor n; - framework::TensorCopy(start_t, platform::CPUPlace(), &n); - T start = n.data()[0]; - framework::TensorCopy(stop_t, platform::CPUPlace(), &n); - T stop = n.data()[0]; - framework::TensorCopy(*num_t, platform::CPUPlace(), &n); - int32_t num = n.data()[0]; + framework::Tensor n_start; + framework::Tensor n_stop; + framework::Tensor n_num; + framework::TensorCopy(start_t, platform::CPUPlace(), &n_start); + T start = n_start.data()[0]; + framework::TensorCopy(stop_t, platform::CPUPlace(), &n_stop); + T stop = n_stop.data()[0]; + framework::TensorCopy(*num_t, platform::CPUPlace(), &n_num); + int64_t num = static_cast(n_num.data()[0]); PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( "The num of linspace op should be larger " @@ -72,14 +81,16 @@ class CUDALinspaceKernel : public framework::OpKernel { T* out_data = out->mutable_data(context.GetPlace()); double step = 0; - if (num != 1) { - step = (static_cast(stop - start)) / (num - 1); - } - auto stream = context.cuda_device_context().stream(); int block = 512; int grid = (num + block - 1) / block; - LinspaceKernel<<>>(start, step, num, out_data); + if (num != 1) { + step = (static_cast(stop - start)) / (num - 1); + LinspaceKernel<<>>(start, stop, step, num, + out_data); + } else { + LinspaceSpecialKernel<<>>(start, out_data); + } } }; diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index 2c30a66ef8e..d8e0fefe175 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -56,9 +56,15 @@ class CPULinspaceKernel : public framework::OpKernel { T* out_data = out->mutable_data(context.GetPlace()); if (num > 1) { + // step should be of double type for all types double step = (static_cast(stop - start)) / (num - 1); + int half_num = num / 2; for (int i = 0; i < num; ++i) { - out_data[i] = static_cast(start + step * i); + if (i < half_num) { + out_data[i] = static_cast(start + step * i); + } else { + out_data[i] = static_cast(stop - step * (num - i - 1)); + } } } else { out_data[0] = static_cast(start); diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 0ce7c098e2d..cf52f3b00fb 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -1424,7 +1424,7 @@ def linspace(start, stop, num, dtype=None, name=None): stop(int|float|Tensor): The input :attr:`stop` is start variable of range. It is a scalar, \ or a Tensor of shape [1] with input data type int32, int64, float32 or float64. num(int|Tensor): The input :attr:`num` is given num of the sequence. It is an int scalar, \ - or a Tensor of shape [1] with data type int32 or int64. + or a Tensor of shape [1] with data type int32. dtype(np.dtype|str, optional): The data type of output tensor, it could be int32, int64, float32 and float64. Default: if None, the data type is float32. name(str, optional): Normally there is no need for user to set this property. -- GitLab From 827ac36faa265f7d183c59bd05390915fdceec97 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 22 Sep 2020 04:50:09 -0500 Subject: [PATCH 040/117] Use dygraph mode by default (#27443) * default open dygraph mode * fix CI-Mac * fix Mac-CI other unittest file * fix CI-Py3 * fix test_communicator_geo and test_buffer_shared_memory_reuse_pass * add enable_static to fix CI-Py3 * add enable_static to fix CI-coverage * delete try except --- python/paddle/__init__.py | 2 ++ python/paddle/fluid/contrib/slim/tests/convert_model2dot.py | 3 +++ .../slim/tests/quant2_int8_image_classification_comparison.py | 2 ++ .../fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py | 2 ++ .../slim/tests/quant_int8_image_classification_comparison.py | 2 ++ python/paddle/fluid/contrib/slim/tests/save_quant_model.py | 2 ++ python/paddle/fluid/contrib/slim/tests/test_graph.py | 2 ++ python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py | 2 ++ .../contrib/slim/tests/test_imperative_qat_channelwise.py | 2 ++ .../slim/tests/test_post_training_quantization_mnist.py | 2 ++ .../slim/tests/test_post_training_quantization_mobilenetv1.py | 2 ++ .../slim/tests/test_post_training_quantization_resnet50.py | 3 +++ .../fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py | 3 +++ .../fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py | 1 + .../paddle/fluid/contrib/slim/tests/test_quantization_pass.py | 2 ++ .../fluid/contrib/slim/tests/test_quantization_scale_pass.py | 2 ++ .../contrib/slim/tests/test_user_defined_quantization.py | 2 ++ .../slim/tests/test_weight_quantization_mobilenetv1.py | 3 +++ python/paddle/fluid/contrib/tests/test_correlation.py | 3 +++ python/paddle/fluid/contrib/tests/test_fp16_utils.py | 3 +++ .../fluid/contrib/tests/test_image_classification_fp16.py | 2 ++ python/paddle/fluid/contrib/tests/test_quantize_transpiler.py | 3 +++ python/paddle/fluid/contrib/tests/test_weight_decay_extend.py | 2 ++ python/paddle/fluid/install_check.py | 4 ++++ python/paddle/fluid/tests/book/test_fit_a_line.py | 2 ++ python/paddle/fluid/tests/book/test_image_classification.py | 2 ++ python/paddle/fluid/tests/book/test_label_semantic_roles.py | 2 ++ python/paddle/fluid/tests/book/test_machine_translation.py | 2 ++ python/paddle/fluid/tests/book/test_recognize_digits.py | 2 ++ python/paddle/fluid/tests/book/test_recommender_system.py | 2 ++ python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py | 3 +++ python/paddle/fluid/tests/book/test_word2vec.py | 2 ++ python/paddle/fluid/tests/custom_op/test_custom_op.py | 2 ++ python/paddle/fluid/tests/test_beam_search_decoder.py | 2 ++ python/paddle/fluid/tests/test_data_feeder.py | 3 +++ python/paddle/fluid/tests/test_detection.py | 3 +++ python/paddle/fluid/tests/test_error_clip.py | 1 + python/paddle/fluid/tests/test_if_else_op.py | 2 ++ python/paddle/fluid/tests/test_python_operator_overriding.py | 3 +++ python/paddle/fluid/tests/unittests/c_comm_init_op.py | 3 +++ python/paddle/fluid/tests/unittests/check_nan_inf_base.py | 2 ++ .../paddle/fluid/tests/unittests/collective_allgather_api.py | 2 ++ .../paddle/fluid/tests/unittests/collective_allgather_op.py | 2 ++ .../paddle/fluid/tests/unittests/collective_allreduce_api.py | 2 ++ .../paddle/fluid/tests/unittests/collective_allreduce_op.py | 2 ++ python/paddle/fluid/tests/unittests/collective_barrier_api.py | 2 ++ .../paddle/fluid/tests/unittests/collective_broadcast_api.py | 2 ++ .../paddle/fluid/tests/unittests/collective_broadcast_op.py | 2 ++ python/paddle/fluid/tests/unittests/collective_reduce_api.py | 2 ++ python/paddle/fluid/tests/unittests/collective_reduce_op.py | 2 ++ .../fluid/tests/unittests/collective_reduce_op_calc_stream.py | 2 ++ .../paddle/fluid/tests/unittests/collective_reducescatter.py | 2 ++ .../fluid/tests/unittests/collective_reducescatter_op.py | 2 ++ python/paddle/fluid/tests/unittests/collective_scatter_api.py | 2 ++ python/paddle/fluid/tests/unittests/collective_scatter_op.py | 2 ++ python/paddle/fluid/tests/unittests/dist_allreduce_op.py | 2 ++ python/paddle/fluid/tests/unittests/dist_fleet_ctr.py | 2 ++ python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py | 2 ++ python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py | 2 ++ python/paddle/fluid/tests/unittests/dist_mnist.py | 2 ++ python/paddle/fluid/tests/unittests/dist_se_resnext.py | 2 ++ .../fluid/tests/unittests/dygraph_to_static/test_yolov3.py | 2 ++ python/paddle/fluid/tests/unittests/test_allgather.py | 3 +++ python/paddle/fluid/tests/unittests/test_allreduce.py | 3 +++ python/paddle/fluid/tests/unittests/test_auto_checkpoint.py | 1 + python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py | 1 + python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py | 1 + python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py | 1 + .../fluid/tests/unittests/test_auto_checkpoint_dist_basic.py | 1 + .../fluid/tests/unittests/test_auto_checkpoint_multiple.py | 1 + python/paddle/fluid/tests/unittests/test_broadcast.py | 3 +++ .../tests/unittests/test_buffer_shared_memory_reuse_pass.py | 1 + .../fluid/tests/unittests/test_collective_allgather_api.py | 3 +++ .../fluid/tests/unittests/test_collective_allreduce_api.py | 3 +++ .../fluid/tests/unittests/test_collective_barrier_api.py | 3 +++ .../fluid/tests/unittests/test_collective_broadcast_api.py | 3 +++ python/paddle/fluid/tests/unittests/test_collective_reduce.py | 3 +++ .../fluid/tests/unittests/test_collective_reduce_api.py | 3 +++ .../paddle/fluid/tests/unittests/test_collective_scatter.py | 3 +++ .../fluid/tests/unittests/test_collective_scatter_api.py | 3 +++ python/paddle/fluid/tests/unittests/test_communicator_geo.py | 3 +++ .../fluid/tests/unittests/test_communicator_half_async.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py | 3 +++ .../tests/unittests/test_dist_fleet_a_sync_optimizer_async.py | 2 ++ .../tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py | 2 ++ .../unittests/test_dist_fleet_a_sync_optimizer_auto_async.py | 2 ++ .../unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py | 2 ++ .../tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py | 2 ++ .../tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py | 2 ++ python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py | 3 +++ .../paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py | 3 +++ .../fluid/tests/unittests/test_dist_fleet_heter_program.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py | 2 ++ python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_backward_deps.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_batch_merge.py | 3 +++ .../paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_fleet_save.py | 3 +++ .../paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_hallreduce.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_multi_comm.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_ring_allreduce.py | 3 +++ .../fluid/tests/unittests/test_dist_mnist_with_program.py | 3 +++ python/paddle/fluid/tests/unittests/test_dist_op.py | 2 ++ .../paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py | 3 +++ .../fluid/tests/unittests/test_dist_transpiler_async_decay.py | 3 +++ .../fluid/tests/unittests/test_dist_transpiler_config.py | 3 +++ .../unittests/test_fleet_graph_execution_meta_optimizer.py | 2 ++ .../paddle/fluid/tests/unittests/test_listen_and_serv_op.py | 2 ++ python/paddle/fluid/tests/unittests/test_nan_inf.py | 3 +++ python/paddle/fluid/tests/unittests/test_reducescatter.py | 3 +++ python/paddle/fluid/tests/unittests/test_reducescatter_api.py | 3 +++ python/paddle/tests/test_text.py | 2 ++ tools/test_runner.py | 3 +++ 119 files changed, 282 insertions(+) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 29e739a0edf..e749cf88b6a 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -276,3 +276,5 @@ from .hapi import callbacks from .hapi import summary import paddle.text import paddle.vision + +disable_static() diff --git a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py index 877897c0a0e..0018d81dbf2 100644 --- a/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py +++ b/python/paddle/fluid/contrib/slim/tests/convert_model2dot.py @@ -19,6 +19,9 @@ import argparse import paddle.fluid as fluid from paddle.fluid.framework import IrGraph from paddle.fluid import core +import paddle + +paddle.enable_static() def parse_args(): diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py index 17e0f452e98..3fba0e89218 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py index a534edb7efd..12d1cfcc41d 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant2_int8_nlp_comparison.py @@ -25,6 +25,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py index 5f0a8f2d6fa..b81ef7b30ed 100644 --- a/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py +++ b/python/paddle/fluid/contrib/slim/tests/quant_int8_image_classification_comparison.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass from paddle.fluid import core +paddle.enable_static() + logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') _logger = logging.getLogger(__name__) _logger.setLevel(logging.INFO) diff --git a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py index dab4b63cda4..e38148250af 100644 --- a/python/paddle/fluid/contrib/slim/tests/save_quant_model.py +++ b/python/paddle/fluid/contrib/slim/tests/save_quant_model.py @@ -27,6 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.fluid import core +paddle.enable_static() + def parse_args(): parser = argparse.ArgumentParser() diff --git a/python/paddle/fluid/contrib/slim/tests/test_graph.py b/python/paddle/fluid/contrib/slim/tests/test_graph.py index 2cf897ec418..435cefd73e7 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_graph.py +++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py @@ -22,6 +22,8 @@ import paddle.fluid as fluid from paddle.fluid.framework import IrGraph from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 0d047a0cd3b..df505cf2435 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -32,6 +32,8 @@ from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.dygraph.nn import Linear from paddle.fluid.log_helper import get_logger +paddle.enable_static() + os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py index 17c613281a8..80d388ac0da 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -32,6 +32,8 @@ from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.dygraph.nn import Linear from paddle.fluid.log_helper import get_logger +paddle.enable_static() + os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py index 3ac1590b8aa..3ea1c84f976 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mnist.py @@ -25,6 +25,8 @@ import paddle.fluid as fluid from paddle.dataset.common import download from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +paddle.enable_static() + random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py index 864631ec278..18389d9433b 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py @@ -26,6 +26,8 @@ import paddle.fluid as fluid from paddle.dataset.common import download from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization +paddle.enable_static() + random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py index a6c19b5e45a..12b5a2458a4 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py +++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_resnet50.py @@ -15,6 +15,9 @@ import sys import unittest from test_post_training_quantization_mobilenetv1 import TestPostTrainingQuantization +import paddle + +paddle.enable_static() class TestPostTrainingForResnet50(TestPostTrainingQuantization): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py index 7b519731314..7f9209c8b3f 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py @@ -18,6 +18,9 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass +import paddle + +paddle.enable_static() class TestQuant2Int8MkldnnPass(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py index 3acbd897419..7ee0fd1d3e2 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py @@ -25,6 +25,7 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass from paddle.fluid import core +paddle.enable_static() os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py index dc9b83e4435..768a9ba7cfc 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py @@ -27,6 +27,8 @@ from paddle.fluid.contrib.slim.quantization import TransformForMobilePass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py index 9e8c5027ebb..b03281546a5 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py @@ -27,6 +27,8 @@ from paddle.fluid.contrib.slim.quantization import OutScaleForInferencePass from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py index 32292c8a47b..f03d0faa398 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py +++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py @@ -29,6 +29,8 @@ from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper +paddle.enable_static() + os.environ["CUDA_VISIBLE_DEVICES"] = "0" os.environ["CPU_NUM"] = "1" diff --git a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py index ff22b1b61e6..1e8fa51d635 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py +++ b/python/paddle/fluid/contrib/slim/tests/test_weight_quantization_mobilenetv1.py @@ -17,6 +17,9 @@ import os import time from paddle.dataset.common import download, DATA_HOME from paddle.fluid.contrib.slim.quantization import WeightQuantization +import paddle + +paddle.enable_static() class TestWeightQuantization(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/tests/test_correlation.py b/python/paddle/fluid/contrib/tests/test_correlation.py index 7fcef4dbcd1..50b091415a5 100644 --- a/python/paddle/fluid/contrib/tests/test_correlation.py +++ b/python/paddle/fluid/contrib/tests/test_correlation.py @@ -16,6 +16,9 @@ import unittest import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable +import paddle + +paddle.enable_static() def corr(x_1, diff --git a/python/paddle/fluid/contrib/tests/test_fp16_utils.py b/python/paddle/fluid/contrib/tests/test_fp16_utils.py index e286bb0150e..0b51f2dcc86 100644 --- a/python/paddle/fluid/contrib/tests/test_fp16_utils.py +++ b/python/paddle/fluid/contrib/tests/test_fp16_utils.py @@ -16,6 +16,9 @@ import unittest import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.contrib.mixed_precision import fp16_utils +import paddle + +paddle.enable_static() class AMPTest(unittest.TestCase): diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py index 5fb1dba40a3..1bf1a234834 100644 --- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py +++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py @@ -25,6 +25,8 @@ import os import copy import numpy as np +paddle.enable_static() + def resnet_cifar10(input, depth=32): def conv_bn_layer(input, diff --git a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py index 77fdf0087b9..342be7db3ed 100644 --- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py +++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py @@ -20,6 +20,9 @@ import paddle import paddle.fluid as fluid from paddle.fluid.contrib.quantize.quantize_transpiler import _original_var_name from paddle.fluid.contrib.quantize.quantize_transpiler import QuantizeTranspiler +import paddle + +paddle.enable_static() def linear_fc(num): diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py index a5f08ca969a..906d83fff4f 100644 --- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -21,6 +21,8 @@ import paddle import paddle.fluid as fluid import contextlib +paddle.enable_static() + def get_places(): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index ef469377acf..51fa1677b86 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -62,6 +62,8 @@ def run_check(): # Your Paddle Fluid works well on MUTIPLE GPU or CPU. # Your Paddle Fluid is installed successfully! Let's start deep Learning with Paddle Fluid now """ + paddle.enable_static() + print("Running Verify Fluid Program ... ") device_list = [] @@ -157,3 +159,5 @@ def run_check(): print( "Your Paddle Fluid is installed successfully ONLY for SINGLE GPU or CPU! " "\n Let's start deep Learning with Paddle Fluid now") + + paddle.disable_static() diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index a7d5a030599..9a2cc4ab1a1 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -23,6 +23,8 @@ import math import sys import os +paddle.enable_static() + def train(use_cuda, save_dirname, is_local): x = fluid.layers.data(name='x', shape=[13], dtype='float32') diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 22b74f29228..7c2d5c693a9 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -24,6 +24,8 @@ import unittest import os import numpy as np +paddle.enable_static() + def resnet_cifar10(input, depth=32): def conv_bn_layer(input, diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index ef14600e644..568d7518a1e 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -25,6 +25,8 @@ import paddle import paddle.dataset.conll05 as conll05 import paddle.fluid as fluid +paddle.enable_static() + word_dict, verb_dict, label_dict = conll05.get_dict() word_dict_len = len(word_dict) label_dict_len = len(label_dict) diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 5e241aaa327..a0056ba3bab 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -24,6 +24,8 @@ from paddle.fluid.executor import Executor import unittest import os +paddle.enable_static() + dict_size = 30000 source_dict_dim = target_dict_dim = dict_size hidden_dim = 32 diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 4fbb146752e..71c57b85160 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -26,6 +26,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.layers.device import get_places +paddle.enable_static() + BATCH_SIZE = 64 diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 433b5498de7..c2ab249f571 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -26,6 +26,8 @@ import paddle.fluid.nets as nets from paddle.fluid.executor import Executor from paddle.fluid.optimizer import SGDOptimizer +paddle.enable_static() + IS_SPARSE = True USE_GPU = False BATCH_SIZE = 256 diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 0d65513c122..3791e386ecf 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -25,6 +25,9 @@ import math import sys import unittest from paddle.fluid.executor import Executor +import paddle + +paddle.enable_static() dict_size = 30000 source_dict_dim = target_dict_dim = dict_size diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index c919584554b..aae4de70aca 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -23,6 +23,8 @@ import numpy as np import math import sys +paddle.enable_static() + def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): PASS_NUM = 100 diff --git a/python/paddle/fluid/tests/custom_op/test_custom_op.py b/python/paddle/fluid/tests/custom_op/test_custom_op.py index 0d02da53d66..c9f7d0b7c96 100644 --- a/python/paddle/fluid/tests/custom_op/test_custom_op.py +++ b/python/paddle/fluid/tests/custom_op/test_custom_op.py @@ -21,6 +21,8 @@ import contextlib import paddle import paddle.fluid as fluid +paddle.enable_static() + file_dir = os.path.dirname(os.path.abspath(__file__)) fluid.load_op_library(os.path.join(file_dir, 'librelu2_op.so')) diff --git a/python/paddle/fluid/tests/test_beam_search_decoder.py b/python/paddle/fluid/tests/test_beam_search_decoder.py index fe8a9daa3be..69f3ff46b3a 100644 --- a/python/paddle/fluid/tests/test_beam_search_decoder.py +++ b/python/paddle/fluid/tests/test_beam_search_decoder.py @@ -29,6 +29,8 @@ from paddle.fluid.contrib.decoder.beam_search_decoder import * import unittest import os +paddle.enable_static() + dict_size = 30000 source_dict_dim = target_dict_dim = dict_size src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py index 16a33fd3ab3..d50c57e670b 100644 --- a/python/paddle/fluid/tests/test_data_feeder.py +++ b/python/paddle/fluid/tests/test_data_feeder.py @@ -16,6 +16,9 @@ from __future__ import print_function import paddle.fluid as fluid import unittest +import paddle + +paddle.enable_static() class TestDataFeeder(unittest.TestCase): diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 425c4e3c7e3..05b9067ec40 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -24,6 +24,9 @@ import numpy as np from unittests.test_imperative_base import new_program_scope from paddle.fluid.dygraph import base from paddle.fluid import core +import paddle + +paddle.enable_static() class LayerTest(unittest.TestCase): diff --git a/python/paddle/fluid/tests/test_error_clip.py b/python/paddle/fluid/tests/test_error_clip.py index 3c977afc7c8..7859fca15f6 100644 --- a/python/paddle/fluid/tests/test_error_clip.py +++ b/python/paddle/fluid/tests/test_error_clip.py @@ -22,6 +22,7 @@ BATCH_SIZE = 128 CLIP_MAX = 2e-6 CLIP_MIN = -1e-6 +paddle.enable_static() prog = fluid.framework.Program() with fluid.program_guard(main_program=prog): diff --git a/python/paddle/fluid/tests/test_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py index 1c992b9d8cd..b7792e5ce27 100644 --- a/python/paddle/fluid/tests/test_if_else_op.py +++ b/python/paddle/fluid/tests/test_if_else_op.py @@ -28,6 +28,8 @@ from paddle.fluid.layers.control_flow import ConditionalBlock import unittest import numpy as np +paddle.enable_static() + class TestMNISTIfElseOp(unittest.TestCase): # FIXME: https://github.com/PaddlePaddle/Paddle/issues/12245#issuecomment-406462379 diff --git a/python/paddle/fluid/tests/test_python_operator_overriding.py b/python/paddle/fluid/tests/test_python_operator_overriding.py index 5f92c437ec7..fd9dc961988 100644 --- a/python/paddle/fluid/tests/test_python_operator_overriding.py +++ b/python/paddle/fluid/tests/test_python_operator_overriding.py @@ -21,6 +21,9 @@ import numpy as np import paddle.fluid.layers as layers import paddle.fluid.framework as framework import paddle.fluid as fluid +import paddle + +paddle.enable_static() class TestPythonOperatorOverride(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/c_comm_init_op.py b/python/paddle/fluid/tests/unittests/c_comm_init_op.py index db77477cca6..ed6a75230c6 100644 --- a/python/paddle/fluid/tests/unittests/c_comm_init_op.py +++ b/python/paddle/fluid/tests/unittests/c_comm_init_op.py @@ -19,6 +19,9 @@ import os import paddle.fluid.core as core import paddle.fluid as fluid from paddle.distributed.fleet.base.private_helper_function import wait_server_ready +import paddle + +paddle.enable_static() class TestCCommInitOp(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py index 8e75b3c3438..c682c795019 100644 --- a/python/paddle/fluid/tests/unittests/check_nan_inf_base.py +++ b/python/paddle/fluid/tests/unittests/check_nan_inf_base.py @@ -28,6 +28,8 @@ import paddle import paddle.fluid as fluid import paddle.compat as cpt +paddle.enable_static() + np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_api.py b/python/paddle/fluid/tests/unittests/collective_allgather_api.py index bdf4ca07ae9..63d7f52c11a 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllgatherAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allgather_op.py b/python/paddle/fluid/tests/unittests/collective_allgather_op.py index 34999654768..f77a97aa915 100644 --- a/python/paddle/fluid/tests/unittests/collective_allgather_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allgather_op.py @@ -34,6 +34,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllGather(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py index aea429ae5e3..67242b274fc 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllreduceAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py index 9aef8879cab..eef59ee3dde 100644 --- a/python/paddle/fluid/tests/unittests/collective_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_allreduce_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveAllreduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_barrier_api.py b/python/paddle/fluid/tests/unittests/collective_barrier_api.py index 09b3c27126d..dbcc70d540b 100644 --- a/python/paddle/fluid/tests/unittests/collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/collective_barrier_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBarrierAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py index a879a027b50..08a3d948906 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBroadcastAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py index 18f0485f923..127f48be618 100644 --- a/python/paddle/fluid/tests/unittests/collective_broadcast_op.py +++ b/python/paddle/fluid/tests/unittests/collective_broadcast_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveBroadcast(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_api.py b/python/paddle/fluid/tests/unittests/collective_reduce_api.py index 3e89b1cb3ee..41e31146a22 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op.py b/python/paddle/fluid/tests/unittests/collective_reduce_op.py index da61284344b..0448c66d132 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py index 7e690428623..7a9e0b148d5 100644 --- a/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py +++ b/python/paddle/fluid/tests/unittests/collective_reduce_op_calc_stream.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduce(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter.py b/python/paddle/fluid/tests/unittests/collective_reducescatter.py index 2f14277ae1e..8b989c73d4d 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter.py @@ -34,6 +34,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py index 3e286d7f43d..91712e2b50f 100644 --- a/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_reducescatter_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveReduceScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_api.py b/python/paddle/fluid/tests/unittests/collective_scatter_api.py index f68929ad3b3..ca36c8c83a5 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_api.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_api_base import TestCollectiveAPIRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveScatterAPI(TestCollectiveAPIRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/collective_scatter_op.py b/python/paddle/fluid/tests/unittests/collective_scatter_op.py index efe5e17bcce..7afa4aec639 100644 --- a/python/paddle/fluid/tests/unittests/collective_scatter_op.py +++ b/python/paddle/fluid/tests/unittests/collective_scatter_op.py @@ -35,6 +35,8 @@ import paddle.fluid.layers as layers from functools import reduce from test_collective_base import TestCollectiveRunnerBase, runtime_main +paddle.enable_static() + class TestCollectiveScatter(TestCollectiveRunnerBase): def __init__(self): diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index 88a3cd14c43..de52072d4a8 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -30,6 +30,8 @@ import signal from functools import reduce from test_dist_base import TestDistRunnerBase, runtime_main +paddle.enable_static() + DTYPE = "float32" paddle.dataset.mnist.fetch() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 1b0ce0c03e7..8277499fcce 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -30,6 +30,8 @@ import ctr_dataset_reader from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index a5633bb0450..2f938a813d8 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -31,6 +31,8 @@ from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 7d5ca4fc6e3..2ea69e1b676 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -34,6 +34,8 @@ from functools import reduce from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from paddle.distributed.fleet.base.util_factory import fleet_util +paddle.enable_static() + DTYPE = "int64" DATA_URL = 'http://paddle-dist-ce-data.bj.bcebos.com/simnet.train.1000' DATA_MD5 = '24e49366eb0611c552667989de2f57d5' diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index 20e89bd46c6..f63139464e7 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -31,6 +31,8 @@ from functools import reduce from test_dist_base import TestDistRunnerBase, runtime_main from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy +paddle.enable_static() + DTYPE = "float32" paddle.dataset.mnist.fetch() diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index a2fd61e2387..5ba40c7c838 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -30,6 +30,8 @@ import sys import signal from test_dist_base import TestDistRunnerBase, runtime_main +paddle.enable_static() + # Fix seed for test fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py index 4453dff892f..6aa9156a0d4 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_yolov3.py @@ -17,12 +17,14 @@ import random import time import unittest +import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import ProgramTranslator from paddle.fluid.dygraph import to_variable from yolov3 import cfg, YOLOv3 +paddle.enable_static() random.seed(0) np.random.seed(0) diff --git a/python/paddle/fluid/tests/unittests/test_allgather.py b/python/paddle/fluid/tests/unittests/test_allgather.py index 877ae6f6e16..9bb34d3db43 100644 --- a/python/paddle/fluid/tests/unittests/test_allgather.py +++ b/python/paddle/fluid/tests/unittests/test_allgather.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestAllGatherOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_allreduce.py b/python/paddle/fluid/tests/unittests/test_allreduce.py index e0b6422a67b..660f559535c 100644 --- a/python/paddle/fluid/tests/unittests/test_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_allreduce.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestAllReduceOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py index fd009db5fd0..3f33120d1f7 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint.py @@ -31,6 +31,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py index 55173325f62..fca1baf85e5 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint1.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py index 5d72fa01008..0c17807a689 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint2.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py index 5382f7e328e..ca103be59b9 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint3.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py index 3c78438bdf6..3eeff91ff2d 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_dist_basic.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py index 8c10cd0e992..f8c12f89051 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py +++ b/python/paddle/fluid/tests/unittests/test_auto_checkpoint_multiple.py @@ -32,6 +32,7 @@ from paddle.io import Dataset, BatchSampler, DataLoader from paddle.fluid.tests.unittests.auto_checkpoint_utils import AutoCheckpointBase, get_logger from paddle.fluid.tests.unittests.test_auto_checkpoint import AutoCheckPointACLBase +paddle.enable_static() logger = get_logger() diff --git a/python/paddle/fluid/tests/unittests/test_broadcast.py b/python/paddle/fluid/tests/unittests/test_broadcast.py index 029e881d6f6..8b8cdb1235c 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCBroadcastOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py index 43d485a0a6d..2c9168df472 100644 --- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py +++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py @@ -36,6 +36,7 @@ class InplaceTestBase(unittest.TestCase): self.fuse_all_optimizer_ops = False def setUp(self): + paddle.enable_static() self.initParameter() if self.use_cuda and fluid.core.is_compiled_with_cuda(): self.device_count = fluid.core.get_cuda_device_count() diff --git a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py index 71777df4651..dbf77fafcc4 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allgather_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveAllgatherAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py index 24dd7cacff6..a405da80ada 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_allreduce_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveAllreduceAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py index ebf86f6ae14..d0a67baa61e 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_barrier_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveBarrierAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py index b1cf4f1ac4c..702e0431157 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_broadcast_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveBroadcastAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce.py b/python/paddle/fluid/tests/unittests/test_collective_reduce.py index 36837d6a227..c0627467428 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCReduceOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py index bf3975f3fc1..8d28c794f02 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_reduce_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveReduceAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter.py b/python/paddle/fluid/tests/unittests/test_collective_scatter.py index 7fe3ce73359..ea34d1cab5a 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestCScatterOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py index cae842b3961..3a37da52b8e 100644 --- a/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_collective_scatter_api.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_api_base import TestDistBase +paddle.enable_static() + class TestCollectiveScatterAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 30207340a27..d9fc9262b31 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -28,6 +28,8 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet +paddle.enable_static() + class TestCommunicatorGeoEnd2End(unittest.TestCase): def net(self): @@ -140,6 +142,7 @@ import paddle.distributed.fleet as fleet from test_communicator_geo import TestCommunicatorGeoEnd2End +paddle.enable_static() class RunServer(TestCommunicatorGeoEnd2End): def runTest(self): diff --git a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py index 542d1874179..391588780f3 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py @@ -29,6 +29,8 @@ import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +paddle.enable_static() + class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): def net(self): @@ -120,6 +122,7 @@ from test_communicator_half_async import TestCommunicatorHalfAsyncEnd2End from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +paddle.enable_static() class RunServer(TestCommunicatorHalfAsyncEnd2End): def runTest(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py index fbeff20c63b..2adf6e41931 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_allreduce_op.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 9df55a6b873..a82612b0ed2 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -19,6 +19,8 @@ import unittest import paddle import paddle.distributed.fleet.base.role_maker as role_maker +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py index 5a5d8afc55b..5b7e0fb94c6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index 9085556c04c..3dff9d0f9d8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py index 4787d048bd2..bdfa3a9a7d5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_geo.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py index 59ca41a11e3..db73069bf7d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_geo.py @@ -18,6 +18,8 @@ import os import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index e0993e022e1..b05a53c88bb 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -19,6 +19,8 @@ import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import time +paddle.enable_static() + class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py index 7d18e935f58..82a8f46a945 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py @@ -22,6 +22,9 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import f from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from test_dist_fleet_base import TestFleetBase from dist_fleet_simnet_bow import train_network +import paddle + +paddle.enable_static() class TestDistGeoCtr_2x2(TestFleetBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py index 02a739c060c..b3e38a42128 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py @@ -18,6 +18,9 @@ import os import unittest import tempfile from test_dist_fleet_heter_base import TestFleetHeterBase +import paddle + +paddle.enable_static() class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index 33690396612..00301f9b1c6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -21,6 +21,9 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker from paddle.distributed.fleet.base.util_factory import fleet_util from paddle.distributed.fleet import fleet +import paddle + +paddle.enable_static() class TestDistFleetHeterProgram(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py index 8132add37a6..d766e6bf2af 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index e7b10be2349..218eb77d0b5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -24,6 +24,8 @@ import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet as fleet +paddle.enable_static() + # For Net base_lr = 0.2 emb_lr = base_lr * 3 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py index de4363f255b..8d101a34b68 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index dc40b2eb5c6..379bcaf684d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index 5e525bdb54d..fd069793473 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -19,6 +19,9 @@ import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +import paddle + +paddle.enable_static() # For Net base_lr = 0.2 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py index ec34993905e..e0fa590db2a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py @@ -18,6 +18,9 @@ import os import unittest import tempfile from test_dist_fleet_base import TestFleetBase +import paddle + +paddle.enable_static() class TestDistSimnetASync2x2(TestFleetBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py index 1f6274ec164..23a2b8fd306 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_backward_deps.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2BackWardDeps(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py index 24c9b9a1397..4cf2cf5f367 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py @@ -16,6 +16,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py index 0b9b85d5d52..9bc48ac0a1b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_dgc_nccl.py @@ -18,6 +18,9 @@ from test_dist_base import TestDistBase import os import subprocess +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py index 7dac1153562..7336794578e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleet_save.py @@ -17,6 +17,9 @@ import shutil import os import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistFleetSave(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py index d5ebe09adca..255fd9b2855 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fleetapi.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2FleetApi(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py index cc002582371..356c5573f95 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_hallreduce.py @@ -17,6 +17,9 @@ import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py index f43ccc8becb..d9e6be8609d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_multi_comm.py @@ -17,6 +17,9 @@ import unittest from test_dist_base import TestDistBase import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py index d063f8473e0..28ef31875db 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_pg.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py index fd15020275b..4436064dc28 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_ring_allreduce.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistNCCL2(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py index 4f4941aa217..d55582fbb4d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_with_program.py @@ -15,6 +15,9 @@ from __future__ import print_function import unittest from test_dist_base import TestDistBase +import paddle + +paddle.enable_static() class TestDistMnistLocalSGDFleetApi(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_op.py b/python/paddle/fluid/tests/unittests/test_dist_op.py index 1f46e0e7f9c..0f71027d274 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_op.py +++ b/python/paddle/fluid/tests/unittests/test_dist_op.py @@ -19,6 +19,8 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core +paddle.enable_static() + def dist(x, y, p): if p == 0.: diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py index dbf0319d305..64217135be7 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext_nccl.py @@ -18,6 +18,9 @@ from test_dist_base import TestDistBase import os import os +import paddle + +paddle.enable_static() flag_name = os.path.splitext(__file__)[0] diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py index 761d57408b9..dd5c393f49c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler_async_decay.py @@ -17,6 +17,9 @@ from __future__ import print_function import unittest import gc import paddle.fluid as fluid +import paddle + +paddle.enable_static() class TranspilerAsyncLRDecayTest(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py index c8d0d840872..e6bc99fc225 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler_config.py @@ -15,6 +15,9 @@ import unittest import paddle.fluid as fluid import gc +import paddle + +paddle.enable_static() gc.set_debug(gc.DEBUG_COLLECTABLE) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py index 927c155ff11..f06f1eaefae 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py @@ -17,6 +17,8 @@ import paddle import os from launch_function_helper import launch_func, wait, _find_free_port +paddle.enable_static() + class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): def setUp(self): diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index 6da37fe4d29..6751c887061 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -28,6 +28,8 @@ import unittest from multiprocessing import Process from op_test import OpTest +paddle.enable_static() + def run_pserver(use_cuda, sync_mode, ip, port, trainers, trainer_id): remove_ps_flag(os.getpid()) diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf.py b/python/paddle/fluid/tests/unittests/test_nan_inf.py index d4a971d25bc..dc9ea5d957a 100644 --- a/python/paddle/fluid/tests/unittests/test_nan_inf.py +++ b/python/paddle/fluid/tests/unittests/test_nan_inf.py @@ -19,6 +19,9 @@ import unittest import os import sys import subprocess +import paddle + +paddle.enable_static() class TestNanInf(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter.py b/python/paddle/fluid/tests/unittests/test_reducescatter.py index 58bcc11cd89..7c355d46285 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter.py @@ -15,9 +15,12 @@ from __future__ import print_function import unittest import numpy as np +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestReduceScatterOp(TestDistBase): def _setup_config(self): diff --git a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py index 5fa75cc3eff..5a494b5529e 100644 --- a/python/paddle/fluid/tests/unittests/test_reducescatter_api.py +++ b/python/paddle/fluid/tests/unittests/test_reducescatter_api.py @@ -16,9 +16,12 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid as fluid +import paddle from test_collective_base import TestDistBase +paddle.enable_static() + class TestReduceScatterAPI(TestDistBase): def _setup_config(self): diff --git a/python/paddle/tests/test_text.py b/python/paddle/tests/test_text.py index 43968896c18..fa83b0cc6f3 100644 --- a/python/paddle/tests/test_text.py +++ b/python/paddle/tests/test_text.py @@ -28,6 +28,8 @@ from paddle import Model, set_device from paddle.static import InputSpec as Input from paddle.text import * +paddle.enable_static() + class ModuleApiTest(unittest.TestCase): @classmethod diff --git a/tools/test_runner.py b/tools/test_runner.py index 9b9f165e736..bad98f9b5c3 100644 --- a/tools/test_runner.py +++ b/tools/test_runner.py @@ -17,12 +17,14 @@ from __future__ import print_function import unittest import os import sys +import paddle import paddle.fluid as fluid import importlib from six.moves import cStringIO def main(): + paddle.enable_static() sys.path.append(os.getcwd()) some_test_failed = False for module_name in sys.argv[1:]: @@ -44,6 +46,7 @@ def main(): 'failed\n', buffer.getvalue(), file=sys.stderr) + paddle.disable_static() if some_test_failed: exit(1) -- GitLab From a04524759ef8d316b2ca5ceb903e829e6a55203a Mon Sep 17 00:00:00 2001 From: 123malin Date: Tue, 22 Sep 2020 21:18:44 +0800 Subject: [PATCH 041/117] Enhance Op's Error Message (#27455) * test=develop, update error message --- paddle/fluid/operators/concat_op.cc | 4 +- .../optimizers/decayed_adagrad_op.cc | 78 ++++++++++--------- .../operators/optimizers/decayed_adagrad_op.h | 22 +++--- .../operators/optimizers/lars_momentum_op.h | 7 +- 4 files changed, 64 insertions(+), 47 deletions(-) diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index 4f337c03599..7937e432d22 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/concat_op.h" + #include #include #include @@ -78,7 +79,8 @@ class ConcatOp : public framework::OperatorWithKernel { } } if (flag == 0) { - PADDLE_THROW("All Inputs of Concat OP are Empty!"); + PADDLE_THROW(platform::errors::InvalidArgument( + "All Inputs of Concat OP are Empty!")); } #ifdef PADDLE_WITH_MKLDNN if (platform::CanMKLDNNBeUsed(ctx)) { diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc index 5c6c38da928..eb41d21e092 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.cc @@ -23,46 +23,54 @@ class DecayedAdagradOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(Param) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(Grad) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Moment"), - "Input(Moment) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->HasInput("LearningRate"), - "Input(LearningRate) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Param").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Grad").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); - - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(ParamOut) of DecayedAdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), - "Output(MomentOut) of DecayedAdagradOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("Param"), "Input", "Param", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Grad"), "Input", "Grad", "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("Moment"), "Input", "Moment", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasInput("LearningRate"), "Input", "LearningRate", + "DecayedAdagradOp"); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Param").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Param").front(), + ctx->GetInputsVarType("Param").front())); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Grad").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->Inputs("Grad").front(), + ctx->GetInputsVarType("Grad").front())); + + OP_INOUT_CHECK(ctx->HasOutput("ParamOut"), "Output", "ParamOut", + "DecayedAdagradOp"); + OP_INOUT_CHECK(ctx->HasOutput("MomentOut"), "Output", "MomentOut", + "DecayedAdagradOp"); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function."); + platform::errors::InvalidArgument( + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "LearningRate should have one element"); + platform::errors::InvalidArgument( + "LearningRate should have one element")); auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of DecayedAdagradOp should have " - "the same dimension."); - PADDLE_ENFORCE_EQ(param_dims, ctx->GetInputDim("Moment"), - "Param and Moment input of DecayedAdagradOp should have " - "the same dimension."); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + platform::errors::InvalidArgument( + "Param and Grad input of DecayedAdagradOp should have " + "the same dimension.")); + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Moment"), + platform::errors::InvalidArgument( + "Param and Moment input of DecayedAdagradOp should have " + "the same dimension.")); ctx->SetOutputDim("ParamOut", param_dims); ctx->SetOutputDim("MomentOut", param_dims); diff --git a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h index 279edfb015c..f264ebf8a32 100644 --- a/paddle/fluid/operators/optimizers/decayed_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/decayed_adagrad_op.h @@ -24,17 +24,19 @@ class DecayedAdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { const auto* param_var = ctx.InputVar("Param"); - PADDLE_ENFORCE(param_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Param").front(), - framework::ToTypeName(param_var->Type())); + PADDLE_ENFORCE_EQ(param_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + framework::ToTypeName(param_var->Type()))); const auto* grad_var = ctx.InputVar("Grad"); - PADDLE_ENFORCE(grad_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Grad").front(), - framework::ToTypeName(grad_var->Type())); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto param_out_tensor = ctx.Output("ParamOut"); auto moment_out_tensor = ctx.Output("MomentOut"); diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.h b/paddle/fluid/operators/optimizers/lars_momentum_op.h index b579b5143dd..55775bc08fb 100755 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.h +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.h @@ -30,7 +30,12 @@ class LarsMomentumOpKernel : public framework::OpKernel { auto learning_rate = ctx.Input("LearningRate"); auto* grad_var = ctx.InputVar("Grad"); // only support dense for now. - PADDLE_ENFORCE_EQ(grad_var->IsType(), true); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); auto grad = ctx.Input("Grad"); param_out->mutable_data(ctx.GetPlace()); -- GitLab From 76fb95fe769f991685818059324664da3d1d1af4 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Tue, 22 Sep 2020 09:06:10 -0700 Subject: [PATCH 042/117] avoid data transform for linspace OP (#27444) --- paddle/fluid/operators/linspace_op.cc | 11 +++++++++-- python/paddle/fluid/layers/tensor.py | 9 ++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index 667c6e89295..7cc07383bfa 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/linspace_op.h" +#include namespace paddle { namespace operators { @@ -21,7 +22,7 @@ class LinspaceOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { + void InferShape(framework::InferShapeContext *ctx) const override { OP_INOUT_CHECK(ctx->HasInput("Start"), "Input", "Start", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Stop"), "Input", "Stop", "linspace"); OP_INOUT_CHECK(ctx->HasInput("Num"), "Input", "Num", "linspace"); @@ -50,11 +51,17 @@ class LinspaceOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext& ctx) const override { + const framework::ExecutionContext &ctx) const override { return framework::OpKernelType( framework::proto::VarType::Type(ctx.Attr("dtype")), ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const override { + return expected_kernel_type; + } }; class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index cf52f3b00fb..2fba578ec07 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -1453,11 +1453,14 @@ def linspace(start, stop, num, dtype=None, name=None): if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if not isinstance(start, Variable): - tensor_start = fill_constant([1], dtype, start) + with device_guard("cpu"): + tensor_start = fill_constant([1], dtype, start) if not isinstance(stop, Variable): - tensor_stop = fill_constant([1], dtype, stop) + with device_guard("cpu"): + tensor_stop = fill_constant([1], dtype, stop) if not isinstance(num, Variable): - tensor_num = fill_constant([1], 'int32', num) + with device_guard("cpu"): + tensor_num = fill_constant([1], 'int32', num) if in_dygraph_mode(): return core.ops.linspace(tensor_start, tensor_stop, tensor_num, 'dtype', dtype) -- GitLab From 0721767ba90536ee205ca04ac35dd0c124a797c8 Mon Sep 17 00:00:00 2001 From: danleifeng <52735331+danleifeng@users.noreply.github.com> Date: Wed, 23 Sep 2020 10:18:32 +0800 Subject: [PATCH 043/117] fix server_num bug;test=develop (#27442) --- python/paddle/distributed/fleet/base/role_maker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 81d5908ccd4..f66f013e4db 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -605,7 +605,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): """ if not self._role_is_generated: self._generate_role() - return len(self._get_pserver_endpoints()) + return len(self._get_pserver_endpoints( + )) if self._get_pserver_endpoints() is not None else 0 def _node_num(self): """ -- GitLab From 765064476b26601d1aba653823a05b21a423ef25 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 23 Sep 2020 10:23:10 +0800 Subject: [PATCH 044/117] Polish some lost invalid error message (#27445) * polish some lost error msg * add some math file to white list * polish detail based reviewer commnet --- paddle/fluid/inference/capi/pd_predictor.cc | 4 ++- paddle/fluid/inference/io.cc | 25 +++++++++++-------- .../tensorrt/plugin/trt_plugin_factory.cc | 4 +-- .../tensorrt/plugin/trt_plugin_utils.h | 11 ++++---- paddle/fluid/inference/utils/singleton.h | 4 ++- paddle/fluid/platform/cuda_profiler.h | 6 ++++- paddle/fluid/train/demo/demo_trainer.cc | 7 ++++-- paddle/fluid/train/imdb_demo/demo_trainer.cc | 14 ++++++++--- .../train/test_train_recognize_digits.cc | 3 ++- tools/enforce/count_enforce_by_file.sh | 9 ++++++- 10 files changed, 59 insertions(+), 28 deletions(-) diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 31915496893..c1bf4c974fa 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -131,7 +131,9 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, PADDLE_ENFORCE_EQ( input_names.size(), in_size, paddle::platform::errors::InvalidArgument( - "The number of input and the number of model's input must match.")); + "The number of input and the number of model's input must match. The " + "number of input is %d, the number of model's input is %d.", + input_names.size(), in_size)); for (int i = 0; i < in_size; ++i) { auto input_t = predictor->GetInputTensor(inputs[i].name); std::vector tensor_shape; diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index c497ab384b5..84e011c6505 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -47,7 +47,9 @@ void Init(const std::vector argv) { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -133,9 +135,10 @@ std::unique_ptr Load(framework::Executor* executor, std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); // model_from_memory is false in separate parameters. LoadPersistables(executor, scope, *main_program, dirname, "", @@ -151,9 +154,10 @@ std::unique_ptr Load( std::unique_ptr main_program( new framework::ProgramDesc(program_desc_str)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_filename, false /* model_from_memory */); @@ -165,9 +169,10 @@ std::unique_ptr LoadFromMemory( const std::string& prog_buffer, const std::string& param_buffer) { std::unique_ptr main_program( new framework::ProgramDesc(prog_buffer)); - PADDLE_ENFORCE(framework::IsProgramVersionSupported(main_program->Version()), - "model version %ld is not supported.", - main_program->Version()); + PADDLE_ENFORCE_EQ( + framework::IsProgramVersionSupported(main_program->Version()), true, + platform::errors::Unavailable("Model version %ld is not supported.", + main_program->Version())); LoadPersistables(executor, scope, *main_program, "", param_buffer, true /* model_filename */); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc index 76b0832c546..0bf8a1691e2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.cc @@ -27,8 +27,8 @@ PluginTensorRT* PluginFactoryTensorRT::createPlugin(const char* layer_name, PADDLE_ENFORCE_EQ( Has(plugin_type), true, - platform::errors::NotFound( - "trt plugin type %s does not exists, check it.", plugin_type)); + platform::errors::NotFound("TensorRT plugin type `%s` does not exists.", + plugin_type)); auto plugin = plugin_registry_[plugin_type](serial_data, serial_length); owned_plugins_.emplace_back(plugin); diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 6fcb70c6d32..16751c764bd 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -103,12 +103,11 @@ struct Serializer, DeserializeValue(buffer, buffer_size, &size); value->resize(size); size_t nbyte = value->size() * sizeof(T); - PADDLE_ENFORCE_GE( - *buffer_size, nbyte, - platform::errors::InvalidArgument("Expect buffer size >= value size in " - "trt plugin deserialization, but got " - "buffer size = %d, value size = %d.", - *buffer_size, nbyte)); + PADDLE_ENFORCE_GE(*buffer_size, nbyte, + platform::errors::InvalidArgument( + "Insufficient data in buffer, expect contains %d " + "byte, but actually only contains %d byte.", + *buffer_size, nbyte)); std::memcpy(value->data(), *buffer, nbyte); reinterpret_cast(*buffer) += nbyte; *buffer_size -= nbyte; diff --git a/paddle/fluid/inference/utils/singleton.h b/paddle/fluid/inference/utils/singleton.h index 990bef35949..6828924c300 100644 --- a/paddle/fluid/inference/utils/singleton.h +++ b/paddle/fluid/inference/utils/singleton.h @@ -46,7 +46,9 @@ struct Registry { template void Register(const std::string& name) { - PADDLE_ENFORCE_EQ(items_.count(name), 0); + PADDLE_ENFORCE_EQ(items_.count(name), 0, + platform::errors::AlreadyExists( + "Item `%s` has beed registered.", name)); items_[name] = new ItemChild; } diff --git a/paddle/fluid/platform/cuda_profiler.h b/paddle/fluid/platform/cuda_profiler.h index 957bdf1e698..a9382f2c8ad 100644 --- a/paddle/fluid/platform/cuda_profiler.h +++ b/paddle/fluid/platform/cuda_profiler.h @@ -24,7 +24,11 @@ namespace platform { void CudaProfilerInit(std::string output_file, std::string output_mode, std::string config_file) { - PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv"); + PADDLE_ENFORCE(output_mode == "kvp" || output_mode == "csv", + platform::errors::InvalidArgument( + "Unsupported cuda profiler output mode, expect `kvp` or " + "`csv`, but received `%s`.", + output_mode)); cudaOutputMode_t mode = output_mode == "csv" ? cudaCSV : cudaKeyValuePair; PADDLE_ENFORCE_CUDA_SUCCESS( cudaProfilerInitialize(config_file.c_str(), output_file.c_str(), mode)); diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index 1087f567245..1ef98720f83 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -29,7 +29,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -70,7 +72,8 @@ int main() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // init all parameters executor.Run(*startup_program, &scope, 0); diff --git a/paddle/fluid/train/imdb_demo/demo_trainer.cc b/paddle/fluid/train/imdb_demo/demo_trainer.cc index d45edd563f0..a08069a57ca 100644 --- a/paddle/fluid/train/imdb_demo/demo_trainer.cc +++ b/paddle/fluid/train/imdb_demo/demo_trainer.cc @@ -45,7 +45,9 @@ namespace train { void ReadBinaryFile(const std::string& filename, std::string* contents) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + PADDLE_ENFORCE_EQ( + fin.is_open(), true, + platform::errors::Unavailable("Failed to open file %s.", filename)); fin.seekg(0, std::ios::end); contents->clear(); contents->resize(fin.tellg()); @@ -98,7 +100,11 @@ int main(int argc, char* argv[]) { file_vec.push_back(filename); } } - PADDLE_ENFORCE_GE(file_vec.size(), 1, "At least one file to train"); + PADDLE_ENFORCE_GE( + file_vec.size(), 1, + platform::errors::InvalidArgument( + "At least one file to train, but received number of file is %d.", + file_vec.size())); paddle::framework::InitDevices(false); const auto cpu_place = paddle::platform::CPUPlace(); paddle::framework::Executor executor(cpu_place); @@ -148,7 +154,9 @@ int main(int argc, char* argv[]) { const std::vector readers = dataset_ptr->GetReaders(); PADDLE_ENFORCE_EQ(readers.size(), 1, - "readers num should be equal to thread num"); + platform::errors::InvalidArgument( + "Readers num(%d) should be equal to thread num(1).", + readers.size())); readers[0]->SetPlace(paddle::platform::CPUPlace()); const std::vector& input_feed_names = readers[0]->GetUseSlotAlias(); diff --git a/paddle/fluid/train/test_train_recognize_digits.cc b/paddle/fluid/train/test_train_recognize_digits.cc index 45c438e8925..e7b698e1a34 100644 --- a/paddle/fluid/train/test_train_recognize_digits.cc +++ b/paddle/fluid/train/test_train_recognize_digits.cc @@ -51,7 +51,8 @@ void Train() { } } - PADDLE_ENFORCE_NE(loss_name, "", "loss not found"); + PADDLE_ENFORCE_NE(loss_name, "", + platform::errors::NotFound("Loss name is not found.")); // prepare data auto x_var = scope.Var("img"); diff --git a/tools/enforce/count_enforce_by_file.sh b/tools/enforce/count_enforce_by_file.sh index 1858bd0fd17..c1e2903c092 100644 --- a/tools/enforce/count_enforce_by_file.sh +++ b/tools/enforce/count_enforce_by_file.sh @@ -57,7 +57,14 @@ FILE_WHITE_LIST="\ random_crop_op.h \ elementwise_op_function.cu.h \ fused_elemwise_activation_op.cc \ - auc_op.cu" + auc_op.cu \ + unsqueeze_op.h \ + unsqueeze_op.cc \ + enforce.h \ + errors_test.cc \ + cross_entropy.cu \ + cross_entropy.h \ + unpooling.cu" function count_file_recursively(){ dir_name=$1 -- GitLab From 5034d181f382eb3bd6d4676bc587002fd158f77b Mon Sep 17 00:00:00 2001 From: Wilber Date: Wed, 23 Sep 2020 10:31:49 +0800 Subject: [PATCH 045/117] update for 2.0 inference api. (#27473) --- paddle/fluid/inference/api/analysis_predictor.cc | 1 + paddle/fluid/inference/api/api_impl.cc | 1 + python/paddle/inference/__init__.py | 16 ++++++++++++++++ python/setup.py.in | 1 + 4 files changed, 19 insertions(+) create mode 100644 python/paddle/inference/__init__.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ac914700643..42e62011f84 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1048,6 +1048,7 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor( config); } diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index ca0a5148f06..c78cdf24dec 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -373,6 +373,7 @@ std::unique_ptr CreatePaddlePredictor< template <> std::unique_ptr CreatePaddlePredictor( const NativeConfig &config) { + LOG(WARNING) << "Deprecated. Please use CreatePredictor instead."; return CreatePaddlePredictor(config); } diff --git a/python/paddle/inference/__init__.py b/python/paddle/inference/__init__.py new file mode 100644 index 00000000000..c388301ec34 --- /dev/null +++ b/python/paddle/inference/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ..fluid.inference import Config, DataType, PlaceType, PrecisionType, Tensor, \ + Predictor, create_predictor, get_version, get_num_bytes_of_data_type, PredictorPool diff --git a/python/setup.py.in b/python/setup.py.in index d85a23a5edd..467c5cb8677 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -155,6 +155,7 @@ packages=['paddle', 'paddle.distributed.fleet.utils', 'paddle.framework', 'paddle.jit', + 'paddle.inference', 'paddle.fluid', 'paddle.fluid.inference', 'paddle.fluid.dygraph', -- GitLab From 906e7f921e6df8d6376902aa581e9c2f03fdc0dc Mon Sep 17 00:00:00 2001 From: Zhang Ting Date: Wed, 23 Sep 2020 10:34:02 +0800 Subject: [PATCH 046/117] add fuse_bn_act op (#27230) * add fused_bn_add_relu op --- cmake/operators.cmake | 3 +- paddle/fluid/operators/fused/CMakeLists.txt | 8 +- .../fused/fused_bn_add_activation_op.cc | 255 +++++++++++++ .../fused/fused_bn_add_activation_op.cu | 338 ++++++++++++++++++ .../fused/fused_bn_add_activation_op.h | 106 ++++++ python/paddle/fluid/contrib/layers/nn.py | 191 +++++++++- .../contrib/mixed_precision/fp16_lists.py | 1 + .../contrib/mixed_precision/fp16_utils.py | 9 +- .../tests/unittests/test_fused_bn_add_act.py | 215 +++++++++++ 9 files changed, 1120 insertions(+), 6 deletions(-) create mode 100644 paddle/fluid/operators/fused/fused_bn_add_activation_op.cc create mode 100644 paddle/fluid/operators/fused/fused_bn_add_activation_op.cu create mode 100644 paddle/fluid/operators/fused/fused_bn_add_activation_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py diff --git a/cmake/operators.cmake b/cmake/operators.cmake index aea972ab3db..21080fbe8fd 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -127,7 +127,8 @@ function(op_library TARGET) "tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op" "fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op" -"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op") +"multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op" +"fused_bn_add_activation_op") if ("${TARGET}" STREQUAL "${manual_pybind_op}") set(pybind_flag 1) endif() diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 3fc5f3bfc6b..477a9162fe3 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -8,7 +8,8 @@ register_operators(EXCLUDES multihead_matmul_op fused_embedding_eltwise_layernorm_op fusion_group_op - fusion_gru_op) + fusion_gru_op + fused_bn_add_activation_op) # fusion_gru_op does not have CUDA kernel op_library(fusion_gru_op) @@ -47,4 +48,9 @@ if (WITH_GPU) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fusion_group);\n") cc_test(test_fusion_group_op SRCS fusion_group_op_test.cc DEPS fusion_group_op) endif() + # fused_bn_add_activation + if (NOT ${CUDNN_VERSION} VERSION_LESS 7401) + op_library(fused_bn_add_activation_op) + file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_bn_add_activation);\n") + endif() endif() diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc new file mode 100644 index 00000000000..5b3ed03bb64 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -0,0 +1,255 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using LoDTensor = framework::LoDTensor; + +void FusedBatchNormAddActOp::InferShape( + framework::InferShapeContext *ctx) const { + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasInput("Bias"), "Input", "Bias", + "FusedBatchNormAddActOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput("Y"), "Output", "Y", "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("MeanOut"), "Output", "MeanOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("VarianceOut"), "Output", "VarianceOut", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedMean"), "Output", "SavedMean", + "FusedBatchNormAddActOp"); + OP_INOUT_CHECK(ctx->HasOutput("SavedVariance"), "Output", "SavedVariance", + "FusedBatchNormAddActOp"); + + const auto x_dims = ctx->GetInputDim("X"); + const auto z_dims = ctx->GetInputDim("Z"); + PADDLE_ENFORCE_EQ(x_dims, z_dims, + platform::errors::InvalidArgument( + "ShapeError: the shapes of input " + "must be equal. But received: the shape " + "of input X = [%s], and the shape of " + "input Y = [%s]", + x_dims, z_dims)); + PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + PADDLE_ENFORCE_LE(x_dims.size(), 5, platform::errors::InvalidArgument( + "ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + + const int64_t C = x_dims[x_dims.size() - 1]; + + auto scale_dim = ctx->GetInputDim("Scale"); + auto bias_dim = ctx->GetInputDim("Bias"); + + PADDLE_ENFORCE_EQ( + scale_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of scale must equal to 1." + "But received: the shape of scale is [%s], the dimension " + "of scale is [%d]", + scale_dim, scale_dim.size())); + PADDLE_ENFORCE_EQ(bias_dim.size(), 1UL, + platform::errors::InvalidArgument( + "ShapeError: the dimension of bias must equal to 1." + "But received: the shape of bias is [%s],the dimension " + "of bias is [%d]", + bias_dim, bias_dim.size())); + + bool check = true; + if ((!ctx->IsRuntime()) && (framework::product(scale_dim) <= 0 || + framework::product(bias_dim) <= 0)) { + check = false; + } + + if (check) { + PADDLE_ENFORCE_EQ(scale_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of scale must equal to [%d]" + "But received: the shape of scale is [%d]", + C, scale_dim[0])); + PADDLE_ENFORCE_EQ(bias_dim[0], C, + platform::errors::InvalidArgument( + "ShapeError: the shape of bias must equal to [%d]" + "But received: the shape of bias is [%d]", + C, bias_dim[0])); + } + ctx->SetOutputDim("Y", x_dims); + ctx->SetOutputDim("MeanOut", {C}); + ctx->SetOutputDim("VarianceOut", {C}); + ctx->SetOutputDim("SavedMean", {C}); + ctx->SetOutputDim("SavedVariance", {C}); + ctx->ShareLoD("X", "Y"); +} + +framework::OpKernelType FusedBatchNormAddActOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); + // By default, the type of the scale, bias, mean, + // and var tensors should be float when input tensor's dtype is float16. + auto bn_param_type = framework::proto::VarType::FP32; + + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Scale")->type(), + platform::errors::InvalidArgument("Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, ctx.Input("Bias")->type(), + platform::errors::InvalidArgument("Bias input should be of float type")); + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, + library); +} + +void FusedBatchNormAddActOpMaker::Make() { + AddInput("X", "The input tensor"); + AddInput("Z", "The input tensor"); + AddInput("Scale", + "Scale is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddInput("Bias", + "Bias is a 1-dimensional tensor of size C " + "that is applied to the output"); + AddOutput("Y", "result after normalization"); + AddOutput("MeanOut", + "Share memory with Mean. " + "Store the global mean when training"); + AddOutput("VarianceOut", + "Share memory with Variance. " + "Store the global Variance when training"); + AddOutput("SavedMean", + "Mean of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("SavedVariance", + "Variance of the current mini batch, " + "will apply to output when training") + .AsIntermediate(); + AddOutput("ReserveSpace", + "Reserve GPU space for triggering the new semi-persistent " + "NHWC kernel"); + AddAttr("momentum", "").SetDefault(0.9); + AddAttr("epsilon", "") + .SetDefault(1e-5) + .AddCustomChecker([](const float &epsilon) { + PADDLE_ENFORCE_EQ(epsilon >= 0.0f && epsilon <= 0.001f, true, + platform::errors::InvalidArgument( + "'epsilon' should be between 0.0 and 0.001.")); + }); + AddAttr("act_type", "The activation type to be fused.") + .SetDefault("relu"); + AddComment(R"DOC( +Fused Batch Normalization with activation. + +Batch Norm has been implemented as discussed in the paper: +https://arxiv.org/pdf/1502.03167.pdf +Batch Norm can be used as a normalizer function for conv2d and fully_connected operations. +Now, the required data format for FusedBatchNormAddActOp is NHWC `[batch, in_height, in_width, in_channels]`. + +)DOC"); +} + +void FusedBatchNormAddActGradOp::InferShape( + framework::InferShapeContext *ctx) const { + // check input + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Z"), "Input", "Z", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("Scale"), "Input", "Scale", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedMean"), "Input", "SavedMean", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput("SavedVariance"), "Input", "SavedVariance", + "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Y")), "Input", + framework::GradVarName("Y"), "FusedBatchNormAddActGradOp"); + + // check output + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("X")), "Output", + framework::GradVarName("X"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Z")), "Output", + framework::GradVarName("Z"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Scale")), "Output", + framework::GradVarName("Scale"), "FusedBatchNormAddActGradOp"); + OP_INOUT_CHECK(ctx->HasOutput(framework::GradVarName("Bias")), "Output", + framework::GradVarName("Bias"), "FusedBatchNormAddActGradOp"); + + const auto in_dims = ctx->GetInputDim("X"); + const int C = in_dims[in_dims.size() - 1]; + + ctx->SetOutputDim(framework::GradVarName("X"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Z"), in_dims); + ctx->SetOutputDim(framework::GradVarName("Scale"), {C}); + ctx->SetOutputDim(framework::GradVarName("Bias"), {C}); +} + +framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType( + const framework::ExecutionContext &ctx) const { + const auto *var = ctx.InputVar(framework::GradVarName("Y")); + if (var == nullptr) { + PADDLE_THROW(platform::errors::NotFound( + "Can not find Y@GRAD in the execution context.")); + } + const Tensor *t = nullptr; + if (var->IsType()) { + t = &var->Get(); + } else if (var->IsType()) { + t = &var->Get(); + } + if (t == nullptr) { + PADDLE_THROW( + platform::errors::NotFound("Can not get the tensor value of Y@GRAD.")); + } + + framework::LibraryType library = framework::LibraryType::kPlain; + framework::DataLayout layout = framework::DataLayout::kAnyLayout; + + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace(), layout, + library); +} + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + fused_bn_add_activation, ops::FusedBatchNormAddActOp, + ops::FusedBatchNormAddActOpMaker, ops::FusedBatchNormAddActOpInferVarType, + ops::FusedBatchNormAddActGradOpMaker, + ops::FusedBatchNormAddActGradOpMaker); +REGISTER_OPERATOR(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradOp); diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu new file mode 100644 index 00000000000..7f1d297cda3 --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -0,0 +1,338 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "paddle/fluid/framework/data_layout.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/norm_utils.h" +#include "paddle/fluid/platform/cudnn_helper.h" +#include "paddle/fluid/platform/float16.h" + +DECLARE_bool(cudnn_batchnorm_spatial_persistent); + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; +template +using CudnnDataType = platform::CudnnDataType; +template +using BatchNormParamType = typename CudnnDataType::BatchNormParamType; + +template +class FusedBatchNormAddActKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + float momentum = ctx.Attr("momentum"); + std::string act_type = ctx.Attr("act_type"); + + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + // Get the size for each dimension. + // NHWC [batch_size, in_height, in_width, in_channels] + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto &in_dims = x->dims(); + + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + + auto *mean_out = ctx.Output("MeanOut"); + auto *variance_out = ctx.Output("VarianceOut"); + mean_out->mutable_data>(ctx.GetPlace()); + variance_out->mutable_data>(ctx.GetPlace()); + + auto *saved_mean = ctx.Output("SavedMean"); + auto *saved_variance = ctx.Output("SavedVariance"); + saved_mean->mutable_data>(ctx.GetPlace()); + saved_variance->mutable_data>(ctx.GetPlace()); + + auto *y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + auto &dev_ctx = ctx.template device_context(); + + // ------------------- cudnn descriptors --------------------- + auto handle = dev_ctx.cudnn_handle(); + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * D * C, 1, W * D * C, D * C, C}; + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + double this_factor = 1. - momentum; + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + size_t workspace_size = 0; + size_t reserve_space_size = 0; + void *reserve_space_ptr = nullptr; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + // Create reserve space and workspace for batch norm. + // Create tensor for each batchnorm op, it will be used in the + // backward. Thus this tensor shouldn't be temp. + auto *reserve_space = ctx.Output("ReserveSpace"); + PADDLE_ENFORCE_NOT_NULL( + reserve_space, + platform::errors::NotFound( + "The argument ReserveSpace of batch_norm op is not found.")); + + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload:: + cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*zDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + // -------------- cudnn batchnorm reserve space -------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + /*handle=*/handle, + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*activationDesc=*/activation_desc_, + /*xDesc=*/data_desc_, + /*sizeInBytes=*/&reserve_space_size)); + + reserve_space_ptr = reserve_space->mutable_data(ctx.GetPlace(), x->type(), + reserve_space_size); + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationForwardTrainingEx( + handle, mode_, bnOps_, CudnnDataType::kOne(), + CudnnDataType::kZero(), data_desc_, x->template data(), + data_desc_, z->template data(), data_desc_, + y->template data(), bn_param_desc_, + scale->template data>(), + bias->template data>(), this_factor, + mean_out->template mutable_data>( + ctx.GetPlace()), + variance_out->template mutable_data>( + ctx.GetPlace()), + epsilon, saved_mean->template mutable_data>( + ctx.GetPlace()), + saved_variance->template mutable_data>( + ctx.GetPlace()), + activation_desc_, workspace_ptr, workspace_size, reserve_space_ptr, + reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +template +class FusedBatchNormAddActGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + platform::errors::PreconditionNotMet("It must use CUDAPlace.")); + double epsilon = static_cast(ctx.Attr("epsilon")); + std::string act_type = ctx.Attr("act_type"); + + const auto *x = ctx.Input("X"); + const auto *z = ctx.Input("Z"); + const auto *y = ctx.Input("Y"); + const auto *d_y = ctx.Input(framework::GradVarName("Y")); + const auto *scale = ctx.Input("Scale"); + const auto *bias = ctx.Input("Bias"); + const auto *reserve_space = ctx.Input("ReserveSpace"); + + const auto &in_dims = x->dims(); + + int N, C, H, W, D; + const DataLayout data_layout = DataLayout::kNHWC; + ExtractNCWHD(in_dims, data_layout, &N, &C, &H, &W, &D); + + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_z = ctx.Output(framework::GradVarName("Z")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + d_x->mutable_data(ctx.GetPlace()); + d_z->mutable_data(ctx.GetPlace()); + PADDLE_ENFORCE_EQ( + d_scale && d_bias, true, + platform::errors::PreconditionNotMet( + "Both the scale grad and the bias grad must not be null.")); + d_scale->mutable_data>(ctx.GetPlace()); + d_bias->mutable_data>(ctx.GetPlace()); + PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL, + platform::errors::PreconditionNotMet( + "The scale only has one dimension.")); + PADDLE_ENFORCE_EQ( + scale->dims()[0], C, + platform::errors::PreconditionNotMet( + "The size of scale is equal to the channel of Input(X).")); + + auto &dev_ctx = ctx.template device_context(); + + std::vector dims = {N, C, H, W, D}; + std::vector strides = {H * W * C * D, 1, W * D * C, D * C, C}; + // ------------------- cudnn descriptors --------------------- + cudnnTensorDescriptor_t data_desc_; + cudnnTensorDescriptor_t bn_param_desc_; + cudnnBatchNormMode_t mode_ = CUDNN_BATCHNORM_SPATIAL_PERSISTENT; + + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_)); + if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) { + LOG(ERROR) << "Provided epsilon is smaller than " + << "CUDNN_BN_MIN_EPSILON. Setting it to " + << "CUDNN_BN_MIN_EPSILON instead."; + } + epsilon = std::max(epsilon, CUDNN_BN_MIN_EPSILON); + + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::cudnnSetTensorNdDescriptor( + data_desc_, CudnnDataType::type, + in_dims.size() > 3 ? in_dims.size() : 4, dims.data(), strides.data())); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDeriveBNTensorDescriptor(bn_param_desc_, + data_desc_, mode_)); + + const auto *saved_mean = ctx.Input("SavedMean"); + const auto *saved_var = ctx.Input("SavedVariance"); + const auto *saved_mean_data = + saved_mean->template data>(); + const auto *saved_var_data = + saved_var->template data>(); + + size_t workspace_size = 0; + void *workspace_ptr = nullptr; + Tensor workspace_tensor; + auto reserve_space_size = reserve_space->memory_size(); + cudnnBatchNormOps_t bnOps_ = CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION; + platform::ScopedActivationDescriptor scope_act_desc; + cudnnActivationDescriptor_t activation_desc_ = + scope_act_desc.descriptor(act_type); + // --------------- cudnn batchnorm workspace --------------- + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*xDesc=*/data_desc_, + /*yDesc=*/data_desc_, + /*dyDesc=*/data_desc_, + /*dzDesc=*/data_desc_, + /*dxDesc=*/data_desc_, + /*bnScaleBiasMeanVarDesc=*/bn_param_desc_, + /*activationDesc=*/activation_desc_, + /*sizeInBytes=*/&workspace_size)); + + workspace_ptr = workspace_tensor.mutable_data(ctx.GetPlace(), x->type(), + workspace_size); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnBatchNormalizationBackwardEx( + /*handle=*/dev_ctx.cudnn_handle(), + /*mode=*/mode_, + /*bnOps=*/bnOps_, + /*alphaDataDiff=*/CudnnDataType::kOne(), + /*betaDataDiff=*/CudnnDataType::kZero(), + /*alphaParamDiff=*/CudnnDataType::kOne(), + /*betaParamDiff=*/CudnnDataType::kZero(), + /*xDesc=*/data_desc_, + /*xData=*/x->template data(), + /*yDesc=*/data_desc_, + /*yData=*/y->template data(), + /*dyDesc=*/data_desc_, + /*dyData=*/d_y->template data(), + /*dzDesc=*/data_desc_, + /*dzData=*/d_z->template data(), + /*dxDesc=*/data_desc_, + /*dxData=*/d_x->template data(), + /*dBnScaleBiasDesc=*/bn_param_desc_, + /*bnScaleData=*/scale->template data>(), + /*bnBiasData=*/bias->template data>(), + /*dBnScaleData=*/d_scale->template data>(), + /*dBnBiasData=*/d_bias->template data>(), + /*epsilon=*/epsilon, + /*savedMean=*/saved_mean_data, + /*savedInvVariance=*/saved_var_data, + /*activationDesmc=*/activation_desc_, + /*workspace=*/workspace_ptr, + /*workSpaceSizeInBytes=*/workspace_size, + /*reserveSpace=*/const_cast(reserve_space->template data()), + /*reserveSpaceSizeInBytes=*/reserve_space_size)); + + // clean when exit. + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(data_desc_)); + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_)); + } +}; + +} // namespace operators +} // namespace paddle + +#if CUDNN_VERSION >= 7401 +namespace ops = paddle::operators; +namespace plat = paddle::platform; +REGISTER_OP_CUDA_KERNEL( + fused_bn_add_activation, + ops::FusedBatchNormAddActKernel); +REGISTER_OP_CUDA_KERNEL(fused_bn_add_activation_grad, + ops::FusedBatchNormAddActGradKernel< + plat::CUDADeviceContext, plat::float16>); +#endif diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h new file mode 100644 index 00000000000..5c7df96e60d --- /dev/null +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/grad_op_desc_maker.h" +#include "paddle/fluid/framework/op_proto_maker.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/var_type_inference.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +class FusedBatchNormAddActOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActGradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + void InferShape(framework::InferShapeContext* ctx) const override; + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override; +}; + +class FusedBatchNormAddActOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override; +}; + +template +class FusedBatchNormAddActGradOpMaker : public framework::SingleGradOpMaker { + public: + using framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType(this->ForwardOpType() + "_grad"); + op->SetInput("X", this->Input("X")); + op->SetInput("Z", this->Input("Z")); + op->SetInput("Y", this->Output("Y")); + op->SetInput(framework::GradVarName("Y"), this->OutputGrad("Y")); + + op->SetInput("Scale", this->Input("Scale")); + op->SetInput("Bias", this->Input("Bias")); + op->SetInput("SavedMean", this->Output("SavedMean")); + op->SetInput("SavedVariance", this->Output("SavedVariance")); + op->SetInput("ReserveSpace", this->Output("ReserveSpace")); + + op->SetAttrMap(this->Attrs()); + + op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); + op->SetOutput(framework::GradVarName("Z"), this->InputGrad("Z")); + op->SetOutput(framework::GradVarName("Scale"), this->InputGrad("Scale")); + op->SetOutput(framework::GradVarName("Bias"), this->InputGrad("Bias")); + } +}; + +class FusedBatchNormAddActOpInferVarType + : public framework::PassInDtypeAndVarTypeToOutput { + protected: + std::unordered_map& GetInputOutputWithSameType() + const override { + static std::unordered_map m{{"X", /*->*/ "Y"}}; + return m; + } +}; + +template +class FusedBatchNormAddActKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +template +class FusedBatchNormAddActGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override; +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index 7b564b3f837..ac6493b1c29 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -45,6 +45,7 @@ from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, convert_dtype from paddle.fluid import core +from paddle.fluid.param_attr import ParamAttr from paddle.fluid.entry_attr import ProbabilityEntry, CountFilterEntry from paddle.fluid.framework import Variable, convert_np_dtype_to_dtype_ @@ -57,7 +58,7 @@ __all__ = [ 'multiclass_nms2', 'search_pyramid_hash', 'shuffle_batch', 'partial_concat', 'sparse_embedding', 'partial_sum', 'tdm_child', 'rank_attention', 'tdm_sampler', 'batch_fc', '_pull_box_extended_sparse', 'bilateral_slice', - 'correlation' + 'correlation', 'fused_bn_add_act' ] @@ -1625,3 +1626,191 @@ def correlation(x, }, outputs={"Output": output}) return output + + +def fused_bn_add_act(x, + y, + momentum=0.9, + epsilon=1e-05, + param_attr=None, + bias_attr=None, + moving_mean_name=None, + moving_variance_name=None, + act=None, + name=None): + """ + This Op performs batch norm on input x, and adds the result to input y. Then + it performs activation on the sum. The data format of inputs must be NHWC + `[batch, in_height, in_width, in_channels]`. + + Args: + x(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + y(Tensor): The rank of input tensor can be 2, 3, 4, 5. The data type + is float16. + momentum(float|Tensor, optional): The value used for the moving_mean and + moving_var computation. This should be a float number or a tensor with + shape [1] and data type as float32. The updated formula is: + :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` + :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` + Default is 0.9. + epsilon(float, optional): A value added to the denominator for + numerical stability. Default is 1e-5. + param_attr(ParamAttr, optional): The parameter attribute for Parameter `scale` + of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as param_attr, the name of scale can be set in ParamAttr. + If the Initializer of the param_attr is not set, the parameter is initialized + with Xavier. Default: None. + bias_attr(ParamAttr, optional): The parameter attribute for the bias of batch_norm. + If it is set to None or one attribute of ParamAttr, batch_norm + will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. + If the Initializer of the bias_attr is not set, the bias is initialized zero. + Default: None. + moving_mean_name(str, optional): The name of moving_mean which store the global Mean. If it + is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm + will save global mean with the string. + moving_variance_name(str, optional): The name of the moving_variance which store the global Variance. + If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm + will save global variance with the string. + act(string, optional): Activation type, linear|relu|prelu|... + name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. + Usually name is no need to set and None by default. + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def build_program(main_program, startup_program): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act(conv1_2, bn) + prediction = fluid.layers.fc(input=fused_bn_add_act, size=10, act='softmax') + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + iters = 5 + batch_size = 16 + support_gpu = fluid.is_compiled_with_cuda() + if support_gpu: + main_program = fluid.Program() + startup_program = fluid.Program() + place = fluid.CUDAPlace(0) + x, y, loss = build_program(main_program, startup_program) + + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss]) + """ + helper = LayerHelper('fused_bn_add_act', **locals()) + + check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + check_variable_and_dtype(y, 'input', ['float16', 'float32', 'float64'], + 'fused_bn_add_act') + bn_param_dtype = core.VarDesc.VarType.FP32 + + x_shape = x.shape + channel_num = x_shape[-1] + param_shape = [channel_num] + + # create parameter + scale = helper.create_parameter( + attr=helper.param_attr, + shape=param_shape, + dtype=bn_param_dtype, + default_initializer=Constant(1.0)) + bias = helper.create_parameter( + attr=helper.bias_attr, + shape=param_shape, + dtype=bn_param_dtype, + is_bias=True) + mean = helper.create_parameter( + attr=ParamAttr( + name=moving_mean_name, initializer=Constant(0.0), trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + mean.stop_gradient = True + variance = helper.create_parameter( + attr=ParamAttr( + name=moving_variance_name, + initializer=Constant(1.0), + trainable=False), + shape=param_shape, + dtype=bn_param_dtype) + variance.stop_gradient = True + + # create output + # mean and mean_out share the same memory + mean_out = mean + # variance and variance out share the same memory + variance_out = variance + saved_mean = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + saved_variance = helper.create_variable_for_type_inference( + dtype=bn_param_dtype, stop_gradient=True) + reserve_space = helper.create_variable_for_type_inference( + dtype=core.VarDesc.VarType.FP16, stop_gradient=True) + batch_norm_out = helper.create_variable_for_type_inference( + core.VarDesc.VarType.FP16) + + inputs = { + "X": x, + "Z": y, + "Scale": scale, + "Bias": bias, + } + attrs = {"epsilon": epsilon, 'momentum': momentum} + + outputs = { + "Y": batch_norm_out, + "MeanOut": mean_out, + "VarianceOut": variance_out, + "SavedMean": saved_mean, + "SavedVariance": saved_variance, + "ReserveSpace": reserve_space + } + + helper.append_op( + type="fused_bn_add_activation", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return batch_norm_out diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py index 1f301b7148d..a9f080c514d 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py @@ -135,6 +135,7 @@ gray_list = { 'get_tensor_from_selected_rows', 'sign', 'cast', + 'fused_bn_add_activation', } ''' # The set of ops that don't support fp16 calculation diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 0b142ff33de..0ff166d8dc8 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -69,8 +69,10 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): ] for in_name in op.input_names: - if src_dtype == core.VarDesc.VarType.FP32 and op.type == 'batch_norm': - if in_name != 'X': + if src_dtype == core.VarDesc.VarType.FP32 and op.type in [ + 'batch_norm', 'fused_bn_add_activation' + ]: + if in_name not in {'X', 'Z'}: continue for in_var_name in op.input(in_name): in_var = block.var(in_var_name) @@ -102,7 +104,8 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): op._set_attr('in_dtype', dest_dtype) if src_dtype == core.VarDesc.VarType.FP32 and dest_dtype == core.VarDesc.VarType.FP16: for out_name in op.output_names: - if op.type == 'batch_norm' and out_name != 'Y': + if op.type in ['batch_norm', 'fused_bn_add_activation' + ] and out_name != 'Y': continue for out_var_name in op.output(out_name): out_var = block.var(out_var_name) diff --git a/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py new file mode 100644 index 00000000000..1bc305cd1f4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fused_bn_add_act.py @@ -0,0 +1,215 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import core + + +@unittest.skipIf(not core.is_compiled_with_cuda(), + "Paddle core is not compiled with CUDA") +class TestFusedBnAddActAPI(unittest.TestCase): + def setUp(self): + self.conv_param_attr1 = fluid.ParamAttr( + name='conv2d_1.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.conv_param_attr2 = fluid.ParamAttr( + name='conv2d_2.weight', + initializer=fluid.initializer.Xavier(uniform=False), + learning_rate=0.001) + self.bn_param_attr1 = fluid.ParamAttr( + name='batch_norm_w_1', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr1 = fluid.ParamAttr( + name='batch_norm_b_1', + initializer=fluid.initializer.Constant(value=0.0)) + self.bn_param_attr2 = fluid.ParamAttr( + name='batch_norm_w_2', + initializer=fluid.initializer.Constant(value=1.0)) + self.bn_bias_attr2 = fluid.ParamAttr( + name='batch_norm_b_2', + initializer=fluid.initializer.Constant(value=0.0)) + self.fc_param_attr = fluid.ParamAttr( + name='fc.weight', + initializer=fluid.initializer.Xavier(uniform=False)) + + def build_fused_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + fused_bn_add_act = fluid.contrib.layers.fused_bn_add_act( + conv1_2, + bn, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2) + prediction = fluid.layers.fc(input=fused_bn_add_act, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def build_origin_program(self, + main_program, + startup_program, + use_cuda, + seed=1): + with fluid.program_guard(main_program, startup_program): + x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32') + y = fluid.layers.data(name="y", shape=[1], dtype='int64') + conv1_1 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr1, + bias_attr=False, + data_format='NHWC') + conv1_2 = fluid.layers.conv2d( + input=x, + filter_size=3, + num_filters=32, + stride=1, + padding=1, + act=None, + param_attr=self.conv_param_attr2, + bias_attr=False, + data_format='NHWC') + bn1 = fluid.layers.batch_norm( + input=conv1_1, + param_attr=self.bn_param_attr1, + bias_attr=self.bn_bias_attr1, + act=None, + data_layout='NHWC') + bn2 = fluid.layers.batch_norm( + input=conv1_2, + param_attr=self.bn_param_attr2, + bias_attr=self.bn_bias_attr2, + act=None, + data_layout='NHWC') + out = bn1 + bn2 + out = fluid.layers.relu(out) + prediction = fluid.layers.fc(input=out, + size=10, + act='softmax', + param_attr=self.fc_param_attr) + loss = fluid.layers.cross_entropy(input=prediction, label=y) + loss = fluid.layers.mean(loss) + sgd = fluid.optimizer.SGD(learning_rate=0.001) + sgd = fluid.contrib.mixed_precision.decorate( + sgd, use_dynamic_loss_scaling=True, init_loss_scaling=128.0) + sgd.minimize(loss) + + return x, y, loss + + def check(self, place, use_cuda): + paddle.manual_seed(1) + paddle.framework.random._manual_program_seed(1) + iters = 5 + batch_size = 16 + + # build_fused_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_fused_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + exe = fluid.Executor(place) + loss_vals_fused = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals_fused.append(loss_v[0][0]) + + # build_origin_program + main_program = fluid.Program() + startup_program = fluid.Program() + x, y, loss = self.build_origin_program(main_program, startup_program, + use_cuda) + feeder = fluid.DataFeeder(feed_list=[x, y], place=place) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + loss_vals = [] + scope = fluid.Scope() + with fluid.scope_guard(scope): + exe.run(startup_program) + for _ in range(iters): + data = next(train_reader()) + loss_v = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[loss]) + loss_vals.append(loss_v[0][0]) + + # check loss + for i in range(iters): + self.assertAlmostEqual(loss_vals[i], loss_vals_fused[i], delta=1e-5) + + def test_fuse_bn_add_act(self): + place = fluid.CUDAPlace(0) + self.check(place, use_cuda=True) + + +if __name__ == '__main__': + unittest.main() -- GitLab From 43240a1b814a49e6b02c3b1c49249a9b6f7fe2c5 Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Wed, 23 Sep 2020 10:42:51 +0800 Subject: [PATCH 047/117] [doc] Add example for cache and buffered (#26819) * Add example for cache and buffered, test=develop, test=document_fix --- python/paddle/reader/decorator.py | 42 +++++++++++++++++++++++++++---- tools/wlist.json | 2 -- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 91a2a78203c..8ee4d73ea84 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -62,6 +62,22 @@ def cache(reader): Returns: generator: a decorated reader object which yields data from cached memory. + + Examples: + .. code-block:: python + + import paddle + + def reader(): + for i in range(3): + yield i + + # All data is cached into memory + cached_reader = paddle.io.cache(reader) + + # Output: 0 1 2 + for i in cached_reader(): + print(i) """ all_data = tuple(reader()) @@ -296,12 +312,28 @@ def buffered(reader, size): buffer. Reading from the buffered data reader will proceed as long as the buffer is not empty. - :param reader: the data reader to read from. - :type reader: callable - :param size: max buffer size. - :type size: int + Args: + reader(generator): the data reader to read from. + size(int): max buffer size. + + Returns: + generator: the buffered data reader. + + Examples: + .. code-block:: python - :returns: the buffered data reader. + import paddle + + def reader(): + for i in range(3): + yield i + + # Create a buffered reader, and the buffer size is 2. + buffered_reader = paddle.io.buffered(reader, 2) + + # Output: 0 1 2 + for i in buffered_reader(): + print(i) """ class EndSignal(): diff --git a/tools/wlist.json b/tools/wlist.json index 20f6a9cbaed..5591f90da4b 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -105,8 +105,6 @@ "convert_dist_to_sparse_program", "load_persistables_for_increment", "load_persistables_for_inference", - "cache", - "buffered", "xmap_readers", "Metric.reset", "Metric.update", -- GitLab From 41b59555387616edef6bd5ef1b9093ab92b90db1 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 23 Sep 2020 10:55:01 +0800 Subject: [PATCH 048/117] Polish no onwer ops error message (#27448) * polish no onwer op error message * fix unittest failed * polish details based reviewer comment --- .../operators/add_position_encoding_op.cc | 10 +++- paddle/fluid/operators/assign_value_op.h | 5 +- paddle/fluid/operators/coalesce_tensor_op.cc | 60 ++++++++++++------- .../fluid/operators/dequantize_abs_max_op.cc | 6 +- paddle/fluid/operators/detection/gpc.cc | 30 +++++++--- .../distributed_ops/fetch_barrier_op.cc | 4 +- .../distributed_ops/send_recv_util.h | 10 ++-- paddle/fluid/operators/gru_unit_op.h | 8 ++- paddle/fluid/operators/interpolate_op.cc | 47 ++++++++------- paddle/fluid/operators/merge_lod_tensor_op.cc | 46 ++++++++------ paddle/fluid/operators/strided_memcpy.h | 32 +++++++--- paddle/fluid/operators/var_conv_2d_op.cc | 38 ++++++++---- 12 files changed, 193 insertions(+), 103 deletions(-) diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index 629fedba6e3..e5fcd270eb8 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -69,12 +69,18 @@ class AddPositionEncodingOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("alpha", "The scale of Original Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& alpha) { - PADDLE_ENFORCE(alpha >= 0.0f, "'alpha' must be above 0.0."); + PADDLE_ENFORCE_GE( + alpha, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'alpha' must be greater than or equal to 0.0.")); }); AddAttr("beta", "The scale of Position Embedding.") .SetDefault(1.0f) .AddCustomChecker([](const float& beta) { - PADDLE_ENFORCE(beta >= 0.0f, "'beta' must be between 0.0."); + PADDLE_ENFORCE_GE( + beta, 0.0f, + platform::errors::InvalidArgument( + "Attribute 'beta' must be greater than or equal to 0.0.")); }); AddComment(R"DOC( Add Position Encoding Operator. diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index b462c43d23a..1418d96b67b 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -76,7 +76,10 @@ class AssignValueKernel : public framework::OpKernel { value_name = "int64_values"; break; default: - PADDLE_THROW("Unsupported dtype for assign_value_op: %d", dtype); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type(code %d) for AssignValue operator, only " + "supports bool, int32, float32 and int64.", + dtype)); break; } CopyVecotorToTensor(value_name, out, ctx); diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index 5b7bcde21a9..d67d90c348e 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -33,29 +33,37 @@ class CoalesceTensorOpKernel : public framework::OpKernel { auto out_vars = context.MultiOutputVar("Output"); PADDLE_ENFORCE_GT(in_var_names.size(), static_cast(0), - "The CoalesceTensorOp has no input."); - PADDLE_ENFORCE_EQ( - in_var_names.size(), out_var_names.size(), - "The number of CoalesceTensorOp's input and output is not match."); + platform::errors::InvalidArgument( + "The CoalesceTensor operator has no input.")); + PADDLE_ENFORCE_EQ(in_var_names.size(), out_var_names.size(), + platform::errors::InvalidArgument( + "The number of CoalesceTensor operator's input and " + "output is not match, " + "input number is %u, output number is %u.", + in_var_names.size(), out_var_names.size())); // Input & Output check: only support LoDTensor for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_NOT_NULL( in_vars[i], - "The input variable %s of CoalesceTensorOp does not exist.", - in_var_names[i]); + platform::errors::NotFound("The input variable %s of CoalesceTensor " + "operator does not exist.", + in_var_names[i])); PADDLE_ENFORCE_NOT_NULL( out_vars[i], - "The output variable %s of CoalesceTensorOp does not exist.", - out_var_names[i]); - PADDLE_ENFORCE_EQ( - in_vars[i]->IsType(), true, - "The input variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); - PADDLE_ENFORCE_EQ( - out_vars[i]->IsType(), true, - "The output variable %s of CoalesceTensorOp is not LoDTensor.", - in_var_names[i]); + platform::errors::NotFound("The output variable %s of CoalesceTensor " + "operator does not exist.", + out_var_names[i])); + PADDLE_ENFORCE_EQ(in_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The input variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); + PADDLE_ENFORCE_EQ(out_vars[i]->IsType(), true, + platform::errors::InvalidArgument( + "The output variable %s of CoalesceTensor operator " + "is not LoDTensor.", + in_var_names[i])); } auto in_tensors = context.MultiInput("Input"); @@ -64,7 +72,10 @@ class CoalesceTensorOpKernel : public framework::OpKernel { for (size_t i = 0; i < in_var_names.size(); ++i) { PADDLE_ENFORCE_EQ( in_var_names[i], out_var_names[i], - "The input and output variable of CoalesceTensorOp is different."); + platform::errors::InvalidArgument( + "The input and output variable of CoalesceTensor operator is " + "different, %dth input is %s, %dth output is %s.", + i, in_var_names[i], i, out_var_names[i])); } } else { // Init the output as input @@ -134,16 +145,25 @@ class CoalesceTensorOpKernel : public framework::OpKernel { const std::vector &lod_tensors, const std::vector var_names, size_t *numel, const size_t &size_of_dtype, const platform::Place &place) const { - PADDLE_ENFORCE_EQ(lod_tensors.size(), var_names.size()); + PADDLE_ENFORCE_EQ( + lod_tensors.size(), var_names.size(), + platform::errors::InvalidArgument( + "The number of input tensor and variable does not match, the " + "number of input tensor is %u, the number of input variable is %u.", + lod_tensors.size(), var_names.size())); *numel = 0; std::stringstream ss; ss << "alloc_space_for_vars: "; for (size_t i = 0; i < var_names.size(); ++i) { PADDLE_ENFORCE_EQ(lod_tensors[i]->IsInitialized(), true, - "%s is not initialized.", var_names[i]); + platform::errors::InvalidArgument( + "Tensor `%s` is not initialized.", var_names[i])); auto size = lod_tensors[i]->numel(); - PADDLE_ENFORCE_GT(size, 0); + PADDLE_ENFORCE_GT( + size, 0, + platform::errors::InvalidArgument( + "The number of tensor `%s`'s elements is 0.", var_names[i])); ss << "input(" << var_names[i] << ") dim:(" << lod_tensors[i]->dims() << ") " << " addres:" << lod_tensors[i]->data() << ", "; diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index 48743f2e48c..0d4d68d9f62 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -45,10 +45,8 @@ class DequantizeMaxAbsOp : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - "Input(X) of DequantizeMaxAbsOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - "Output(Out) of DequantizeMaxAbsOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "DequantizeMaxAbs"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "DequantizeMaxAbs"); ctx->ShareDim("X", /*->*/ "Out"); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/detection/gpc.cc b/paddle/fluid/operators/detection/gpc.cc index b46d231d0ff..6b1b0cd8b35 100644 --- a/paddle/fluid/operators/detection/gpc.cc +++ b/paddle/fluid/operators/detection/gpc.cc @@ -532,7 +532,8 @@ static int count_contours(polygon_node *polygon) { } static void add_left(polygon_node *p, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); vertex_node *nv = NULL; /* Create a new vertex node and set its fields */ @@ -588,7 +589,8 @@ static void add_right(polygon_node *p, double x, double y) { } static void merge_right(polygon_node *p, polygon_node *q, polygon_node *list) { - PADDLE_ENFORCE_NOT_NULL(p); + PADDLE_ENFORCE_NOT_NULL(p, paddle::platform::errors::InvalidArgument( + "Input polygon node is nullptr.")); polygon_node *target = NULL; /* Label contour as external */ @@ -664,7 +666,8 @@ void add_vertex(vertex_node **t, double x, double y) { } void gpc_vertex_create(edge_node *e, int p, int s, double x, double y) { - PADDLE_ENFORCE_NOT_NULL(e); + PADDLE_ENFORCE_NOT_NULL(e, paddle::platform::errors::InvalidArgument( + "Input edge node is nullptr.")); add_vertex(&(e->outp[p]->v[s]), x, y); e->outp[p]->active++; } @@ -693,7 +696,8 @@ static bbox *create_contour_bboxes(gpc_polygon *p) { gpc_malloc(box, p->num_contours * sizeof(bbox), const_cast("Bounding box creation")); - PADDLE_ENFORCE_NOT_NULL(box); + PADDLE_ENFORCE_NOT_NULL(box, paddle::platform::errors::ResourceExhausted( + "Failed to malloc box memory.")); /* Construct contour bounding boxes */ for (c = 0; c < p->num_contours; c++) { @@ -857,7 +861,9 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) { /* Create an extended hole array */ gpc_malloc(extended_hole, (p->num_contours + 1) * sizeof(int), const_cast("contour hole addition")); - PADDLE_ENFORCE_NOT_NULL(extended_hole); + PADDLE_ENFORCE_NOT_NULL(extended_hole, + paddle::platform::errors::ResourceExhausted( + "Failed to malloc extended hole memory.")); /* Create an extended contour array */ gpc_malloc(extended_contour, @@ -975,7 +981,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); + build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1017,7 +1025,9 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e0 = aet; e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); + aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; @@ -1612,7 +1622,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); - PADDLE_ENFORCE_NOT_NULL(sbt); + PADDLE_ENFORCE_NOT_NULL(sbt, paddle::platform::errors::ResourceExhausted( + "Failed to malloc scanbeam table memory.")); build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1650,7 +1661,8 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, e1 = aet; /* Set up bundle fields of first edge */ - PADDLE_ENFORCE_NOT_NULL(aet); + PADDLE_ENFORCE_NOT_NULL(aet, paddle::platform::errors::InvalidArgument( + "Edge node AET is nullptr.")); aet->bundle[ABOVE][aet->type] = (aet->top.y != yb); aet->bundle[ABOVE][!aet->type] = 0; aet->bstate[ABOVE] = UNBUNDLED; diff --git a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc index b064265917b..c9f9daf3b3c 100644 --- a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc @@ -48,7 +48,9 @@ class FetchBarrierOp : public framework::OperatorBase { } for (size_t i = 0; i < rets.size(); i++) { - PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, "internal error in RPCClient"); + PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, + platform::errors::Unavailable( + "Internal error occurred in RPCClient.")); } } }; diff --git a/paddle/fluid/operators/distributed_ops/send_recv_util.h b/paddle/fluid/operators/distributed_ops/send_recv_util.h index c05a1ff1da8..7dc0596ac31 100644 --- a/paddle/fluid/operators/distributed_ops/send_recv_util.h +++ b/paddle/fluid/operators/distributed_ops/send_recv_util.h @@ -34,16 +34,16 @@ inline bool NeedSend(const framework::Scope& scope, std::string::npos) return false; auto* var = scope.FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var, "Can not find variable '%s' in the send side.", - varname); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "Can not find variable '%s' in the send side.", varname)); if (var->IsType()) { return var->Get().IsInitialized(); } else if (var->IsType()) { return var->Get().rows().size() > 0UL; } else { - PADDLE_THROW( - "Variable type in send side should be in " - "[LodTensor, SelectedRows]"); + PADDLE_THROW(platform::errors::Unimplemented( + "Variable type in send side should be LodTensor or SelectedRows.")); } return false; } diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 712ef05d863..4865a02c529 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -47,7 +47,9 @@ class GRUUnitKernel : public framework::OpKernel { else if (act_type == relu) ReluFunctor()(d, x, y); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { @@ -137,7 +139,9 @@ class GRUUnitGradKernel : public framework::OpKernel { else if (act_type == relu) ReluGradFunctor()(d, x, y, dy, dx); else - PADDLE_THROW("unsupported activation type"); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported activation type, only supports identity, sigmoid, tanh " + "and relu.")); } void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index 1e99e22e12b..e8a9ed878e9 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -104,12 +104,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto dim_x = ctx->GetInputDim("X"); auto interp_method = ctx->Attrs().Get("interp_method"); - PADDLE_ENFORCE( - "bilinear" == interp_method || "nearest" == interp_method || - "bicubic" == interp_method, - "Interpolation method can only be \"bilinear\" or \"nearest\" when " - "Input(X) dimension is 4, but got method = %s .", - interp_method); + PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || + "bicubic" == interp_method, + true, platform::errors::InvalidArgument( + "Interpolation method can only be \"bilinear\" " + "or \"nearest\" or \"bicubic\" when " + "Input(X) dimension is 4, but got method is %s.", + interp_method)); const DataLayout data_layout = framework::StringToDataLayout( ctx->Attrs().Get("data_layout")); @@ -169,13 +170,13 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { auto out_size_dim = ctx->GetInputDim("OutSize"); PADDLE_ENFORCE_EQ( out_size_dim.size(), 1, - platform::errors::InvalidArgument( - "OutSize's dimension size must be 1, but got dimension = %d .", - out_size_dim.size())); + platform::errors::InvalidArgument("OutSize's dimension size must be 1, " + "but got dimension size is %d .", + out_size_dim.size())); PADDLE_ENFORCE_EQ( out_size_dim[0], 2, platform::errors::InvalidArgument( - "OutSize's dim[0] must be 2, but got dimention = %d .", + "OutSize's dimension[0] must be 2, but got dimension[0] is %d .", out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; @@ -264,12 +265,15 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { if (ctx->HasInput("OutSize") && ctx->IsRuntime()) { auto out_size_dim = ctx->GetInputDim("OutSize"); - PADDLE_ENFORCE_EQ(out_size_dim.size(), 1, - "OutSize's dimension size must be 1, but got size =%d .", - out_size_dim.size()); + PADDLE_ENFORCE_EQ( + out_size_dim.size(), 1, + platform::errors::InvalidArgument( + "OutSize's dimension size must be 1, but got size is %d.", + out_size_dim.size())); PADDLE_ENFORCE_EQ(out_size_dim[0], 3, - "OutSize's dim[0] must be 3, but got size = %d .", - out_size_dim[0]); + platform::errors::InvalidArgument( + "OutSize's dim[0] must be 3, but got size is %d.", + out_size_dim[0])); ctx->ShareLoD("X", "Out"); return; } @@ -289,10 +293,8 @@ class InterpolateOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of InterpolateOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of InterpolationOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Interpolate"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Interpolate"); auto dim_x = ctx->GetInputDim("X"); // NCHW format PADDLE_ENFORCE( @@ -534,9 +536,10 @@ class InterpolateOpGrad : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null"); - PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")), - "Input(Out@GRAD) should not be null"); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "InterpolateGrad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "InterpolateGrad"); + auto dim_x = ctx->GetInputDim("X"); if (ctx->HasOutput(framework::GradVarName("X"))) { ctx->SetOutputDim(framework::GradVarName("X"), dim_x); diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index c9b852cfc05..87d914aa797 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { scope.FindVar(Output("Out"))->GetMutable(); auto level = static_cast(Attr("level")); - PADDLE_ENFORCE(in_true.numel() || in_false.numel(), - "Input(InTrue) or Input(InFalse) should be initialized."); + PADDLE_ENFORCE_EQ( + in_true.numel() || in_false.numel(), true, + platform::errors::InvalidArgument( + "Input(InTrue) or Input(InFalse) should be initialized.")); auto &mask_dim = mask.dims(); std::unique_ptr cpu_mask{new framework::LoDTensor()}; @@ -56,7 +58,9 @@ class MergeLoDTensorOp : public framework::OperatorBase { framework::TensorCopy(mask, platform::CPUPlace(), dev_ctx, cpu_mask.get()); #else - PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Not supported GPU, Please recompile or reinstall paddle with CUDA " + "support.")); #endif } auto *mask_data = cpu_mask->data(); @@ -109,7 +113,11 @@ class MergeLoDTensorOp : public framework::OperatorBase { size_t start_offset = lod_and_offset.second.first; size_t end_offset = lod_and_offset.second.second; - PADDLE_ENFORCE_GE(end_offset, start_offset); + PADDLE_ENFORCE_GE(end_offset, start_offset, + platform::errors::InvalidArgument( + "The end offset less than start offset, end offset " + "is %d, start offset is %d.", + end_offset, start_offset)); size_t len = end_offset - start_offset; if (len == 0) { continue; @@ -189,22 +197,24 @@ class MergeLoDTensorInferShape : public framework::InferShapeBase { "merge_lod_tensor"); auto mask_dim = context->GetInputDim("Mask"); PADDLE_ENFORCE_EQ(mask_dim.size(), 2, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor and " - "the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor and " + "the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); if (context->IsRuntime() || mask_dim[1] > 0) { PADDLE_ENFORCE_EQ(mask_dim[1], 1, - "If you are using IfElse OP:" - "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " - "ie.true_block():\n out_1 = ie.input(x)\n\n" - "Please ensure that the cond should be a 2-D tensor " - "and the second dim size of cond should be 1. " - "But now the cond's shape is [", - *mask_dim.Get(), "].\n"); + platform::errors::InvalidArgument( + "If you are using IfElse OP:" + "\n\nie = fluid.layers.IfElse(cond=cond)\nwith " + "ie.true_block():\n out_1 = ie.input(x)\n\n" + "Please ensure that the cond is a 2-D tensor " + "and the second dim size of cond is 1. " + "But now the cond's shape is [%s].\n", + mask_dim)); } context->SetOutputDim("Out", context->GetInputDim("InTrue")); diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index f20bada8ab2..142b00b4de6 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -60,20 +60,33 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, auto place = ctx.GetPlace(); PADDLE_ENFORCE_EQ(src_stride_numel.size(), dst_stride_numel.size(), - "src and dst tensor should have the same dims size."); + platform::errors::InvalidArgument( + "Source and destination tensor should have the same " + "dimension size, but source tensor dimension size is " + "%u, destination tensor size is %u.", + src_stride_numel.size(), dst_stride_numel.size())); for (int64_t i = 0; i < axis; ++i) { if (i < axis) { - PADDLE_ENFORCE_EQ(src_stride_numel[i] / src_stride_numel[axis], - dst_stride_numel[i] / dst_stride_numel[axis], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i] / src_stride_numel[axis], + dst_stride_numel[i] / dst_stride_numel[axis])); } else if (i == axis) { continue; } else { - PADDLE_ENFORCE_EQ(src_stride_numel[i], dst_stride_numel[i], - "src and dst should have the same elements " - "except the specified axis."); + PADDLE_ENFORCE_EQ( + src_stride_numel[i], dst_stride_numel[i], + platform::errors::InvalidArgument( + "Source and destination tensor should have the same number of " + "elements except the specified axis, but the source elements " + "number is %d, destination elements number is %d.", + src_stride_numel[i], dst_stride_numel[i])); } } @@ -90,7 +103,8 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx, memory::Copy(gpu_place, dst + i * dst_after, gpu_place, src + i * src_after, sizeof(T) * size, cuda_ctx.stream()); #else - PADDLE_THROW("Paddle is not compiled with GPU"); + PADDLE_THROW(platform::errors::PreconditionNotMet( + "Paddle is not compiled with GPU.")); #endif } } diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index f8a29a52d7a..db8b2c30501 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -78,21 +78,35 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::NotFound("Col(Output) of VarConv2dOP is not found.")); auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, - "The rank of X(Input) can't be less than 2."); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument( + "The rank of X(Input) can't be less than 2, but received rank is %u.", + x_dims.size())); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(w_dims.size(), 2, "W should be 2-D tensor"); + PADDLE_ENFORCE_EQ( + w_dims.size(), 2, + platform::errors::InvalidArgument( + "Input W should be a 2-D tensor, but its actual dimension is %u.", + w_dims.size())); int output_channel = ctx->Attrs().Get("OutputChannel"); int input_channel = ctx->Attrs().Get("InputChannel"); int kernel_h = ctx->Attrs().Get("KernelH"); int kernel_w = ctx->Attrs().Get("KernelW"); - PADDLE_ENFORCE_EQ(w_dims[0], output_channel, - "W dim[0] should be equal to OutputChannel"); + PADDLE_ENFORCE_EQ( + w_dims[0], output_channel, + platform::errors::InvalidArgument( + "Input W's dimension[0] should be equal to OutputChannel, the " + "dimension[0] is %d, OutputChannel is %d.", + w_dims[0], output_channel)); PADDLE_ENFORCE_EQ( w_dims[1], input_channel * kernel_h * kernel_w, - "W dim[1] should be equal to InputChannel * StrideH * StrideW"); + platform::errors::InvalidArgument( + "Input W's dimension[1] should be equal to InputChannel * StrideH * " + "StrideW, the dimension[1] is %d, expected value is %d.", + w_dims[1], input_channel * kernel_h * kernel_w)); if (ctx->IsRuntime()) { framework::Variable* x_var = @@ -103,10 +117,14 @@ void VarConv2dOP::InferShape(framework::InferShapeContext* ctx) const { platform::errors::InvalidArgument("The Input(X) Tensor of VarConv2dOP " "does not contain LoD information.")); - PADDLE_ENFORCE_GE(x_lod.size(), 1, "The Input(X)'s lod info is corrupted."); - PADDLE_ENFORCE_EQ( - x_dims[0], static_cast(x_lod[0].back()), - "The Input(X)'s lod info mismatches the actual tensor shape."); + PADDLE_ENFORCE_GE(x_lod.size(), 1, + platform::errors::InvalidArgument( + "The Input(X)'s lod info is corrupted.")); + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod[0].back()), + platform::errors::InvalidArgument( + "The Input(X)'s lod info mismatches the actual " + "tensor shape, input lod is %s, tensor shape is %s.", + x_lod, x_dims)); framework::Variable* row_var = BOOST_GET(framework::Variable*, ctx->GetInputVarPtrs("ROW")[0]); -- GitLab From 292b24aa6de374619e4ada5e5f17b602b1def0f8 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Wed, 23 Sep 2020 11:06:51 +0800 Subject: [PATCH 049/117] fix bug MD of compile, And add MD/STATIC/OPENBLAS inference lib check on windows (#27051) --- CMakeLists.txt | 23 ++++- cmake/external/cryptopp.cmake | 17 +--- cmake/flags.cmake | 31 ------- cmake/inference_lib.cmake | 27 +++--- paddle/fluid/inference/CMakeLists.txt | 5 +- .../inference/api/demo_ci/CMakeLists.txt | 4 +- paddle/fluid/inference/api/demo_ci/run.sh | 91 ++++++++++--------- .../inference/api/paddle_infer_declare.h | 4 - paddle/scripts/paddle_build.bat | 19 +++- 9 files changed, 105 insertions(+), 116 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fb796103350..b1554fba5e1 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,8 +63,29 @@ if(WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif() + endforeach(flag_var) endif() - + + # windows build turn off warnings. + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE + CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) + string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") + endforeach(flag_var) + foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) + set(${flag_var} "${${flag_var}} /w") + endforeach(flag_var) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838 /MP") message(STATUS "Using parallel compiling (/MP)") diff --git a/cmake/external/cryptopp.cmake b/cmake/external/cryptopp.cmake index af5dd0e2c9b..351ef1c7c7a 100644 --- a/cmake/external/cryptopp.cmake +++ b/cmake/external/cryptopp.cmake @@ -22,23 +22,8 @@ SET(CRYPTOPP_TAG CRYPTOPP_8_2_0) IF(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/cryptopp-static.lib" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd") - set(CompilerFlags - CMAKE_CXX_FLAGS - CMAKE_CXX_FLAGS_DEBUG - CMAKE_CXX_FLAGS_RELEASE - CMAKE_C_FLAGS - CMAKE_C_FLAGS_DEBUG - CMAKE_C_FLAGS_RELEASE - ) - foreach(CompilerFlag ${CompilerFlags}) - string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") - endforeach() ELSE(WIN32) SET(CRYPTOPP_LIBRARIES "${CRYPTOPP_INSTALL_DIR}/lib/libcryptopp.a" CACHE FILEPATH "cryptopp library." FORCE) - SET(CRYPTOPP_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) ENDIF(WIN32) set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} @@ -48,7 +33,7 @@ set(CRYPTOPP_CMAKE_ARGS ${COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=${CRYPTOPP_INSTALL_DIR}/lib -DCMAKE_INSTALL_PREFIX=${CRYPTOPP_INSTALL_DIR} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - -DCMAKE_CXX_FLAGS=${CRYPTOPP_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 415e07c7542..ed0bf8396b3 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -90,20 +90,6 @@ macro(safe_set_nvflag flag_name) endif() endmacro() -macro(safe_set_static_flag) # set c_flags and cxx_flags to static or shared - if (BUILD_SHARED_LIBS) - return() # if build shared libs, the flags keep same with '/MD' - endif(BUILD_SHARED_LIBS) - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - if(${flag_var} MATCHES "/MD") - string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") - endif(${flag_var} MATCHES "/MD") - endforeach(flag_var) -endmacro() CHECK_CXX_SYMBOL_EXISTS(UINT64_MAX "stdint.h" UINT64_MAX_EXISTS) if(NOT UINT64_MAX_EXISTS) @@ -229,20 +215,3 @@ endforeach() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") - -if(WIN32) - # windows build turn off warnings. - if(MSVC_STATIC_CRT) - safe_set_static_flag() - endif() - foreach(flag_var - CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE - CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO - CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE - CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) - string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") - endforeach(flag_var) - foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS) - set(${flag_var} "${${flag_var}} /w") - endforeach(flag_var) -endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index e3c2409f103..f19f0eb43d3 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -24,7 +24,7 @@ set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_d # so the generation of static lib is temporarily turned off. if(WIN32) #todo: remove the option - option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." OFF) + option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) if(NOT PYTHON_EXECUTABLE) FIND_PACKAGE(PythonInterp REQUIRED) endif() @@ -165,25 +165,22 @@ copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") if(WIN32) if(WITH_STATIC_LIB) - set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib) + set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/libpaddle_fluid.lib + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.*) else() set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.dll - ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) + ${PADDLE_BINARY_DIR}/paddle/fluid/inference/${CMAKE_BUILD_TYPE}/paddle_fluid.lib) endif() + copy(inference_lib_dist + SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib + ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) else(WIN32) set(paddle_fluid_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.*) -endif(WIN32) - -if(WIN32 AND NOT WITH_STATIC_LIB) - copy(inference_lib_dist - SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} - DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib - ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) -else() - copy(inference_lib_dist + copy(inference_lib_dist SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_fluid_lib} DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) -endif() +endif(WIN32) copy(inference_lib_dist SRCS ${CMAKE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h @@ -211,12 +208,12 @@ add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps}) set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid") set(module "inference") -if(WIN32 AND NOT WITH_STATIC_LIB) +if(WIN32) copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ) -else() + else() copy(fluid_lib_dist SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h ${paddle_fluid_lib} DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 9dc96fdfe86..cf6fcb7b643 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -44,10 +44,11 @@ add_subdirectory(api) set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) -if(WIN32) +# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU +if(WIN32 AND WITH_GPU) cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API}) else() - create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) + create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) endif() if(NOT APPLE AND NOT WIN32) diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 08a1a542819..6a3760e1f74 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -51,8 +51,8 @@ if (WIN32) set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + safe_set_static_flag() if (WITH_STATIC_LIB) - safe_set_static_flag() add_definitions(-DSTATIC_LIB) endif() endif() @@ -136,7 +136,7 @@ else() set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} glog gflags_static libprotobuf xxhash ${EXTERNAL_LIB}) - set(DEPS ${DEPS} libcmt shlwapi.lib) + set(DEPS ${DEPS} shlwapi.lib) endif(NOT WIN32) if(WITH_GPU) diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index 6b7fb0f619a..a3e7bec398a 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -6,7 +6,7 @@ TEST_GPU_CPU=$3 # test both GPU/CPU mode or only CPU mode DATA_DIR=$4 # dataset TENSORRT_INCLUDE_DIR=$5 # TensorRT header file dir, default to /usr/local/TensorRT/include TENSORRT_LIB_DIR=$6 # TensorRT lib file dir, default to /usr/local/TensorRT/lib - +MSVC_STATIC_CRT=$7 inference_install_dir=${PADDLE_ROOT}/build/paddle_inference_install_dir cd `dirname $0` @@ -66,43 +66,54 @@ mkdir -p build cd build rm -rf * -if [ $(echo `uname` | grep "Win") != "" ]; then - # -----simple_on_word2vec on windows----- - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=simple_on_word2vec \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - Release/simple_on_word2vec.exe \ - --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "simple_on_word2vec demo runs fail." - exit 1 - fi - - # -----vis_demo on windows----- - rm -rf * - cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ - -DWITH_MKL=$TURN_ON_MKL \ - -DDEMO_NAME=vis_demo \ - -DWITH_GPU=$TEST_GPU_CPU \ - -DWITH_STATIC_LIB=OFF - msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln - for vis_demo_name in $vis_demo_list; do - Release/vis_demo.exe \ - --modeldir=$DATA_DIR/$vis_demo_name/model \ - --data=$DATA_DIR/$vis_demo_name/data.txt \ - --refer=$DATA_DIR/$vis_demo_name/result.txt \ - --use_gpu=False - if [ $? -ne 0 ]; then - echo "vis demo $vis_demo_name runs fail." - exit 1 +for WITH_STATIC_LIB in ON OFF; do + if [ $(echo `uname` | grep "Win") != "" ]; then + # TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready. + if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then + return 0 fi - done -else - for WITH_STATIC_LIB in ON OFF; do + + # -----simple_on_word2vec on windows----- + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=simple_on_word2vec \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + Release/simple_on_word2vec.exe \ + --dirname=$DATA_DIR/word2vec/word2vec.inference.model \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "simple_on_word2vec demo runs fail." + exit 1 + fi + done + + # -----vis_demo on windows----- + rm -rf * + cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ + -DWITH_MKL=$TURN_ON_MKL \ + -DDEMO_NAME=vis_demo \ + -DWITH_GPU=$TEST_GPU_CPU \ + -DWITH_STATIC_LIB=$WITH_STATIC_LIB \ + -DMSVC_STATIC_CRT=$MSVC_STATIC_CRT + msbuild /maxcpucount /property:Configuration=Release cpp_inference_demo.sln + for use_gpu in $use_gpu_list; do + for vis_demo_name in $vis_demo_list; do + Release/vis_demo.exe \ + --modeldir=$DATA_DIR/$vis_demo_name/model \ + --data=$DATA_DIR/$vis_demo_name/data.txt \ + --refer=$DATA_DIR/$vis_demo_name/result.txt \ + --use_gpu=$use_gpu + if [ $? -ne 0 ]; then + echo "vis demo $vis_demo_name runs fail." + exit 1 + fi + done + done + else # -----simple_on_word2vec on linux/mac----- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -123,7 +134,6 @@ else fi done fi - # ---------vis_demo on linux/mac--------- rm -rf * cmake .. -DPADDLE_LIB=${inference_install_dir} \ @@ -145,7 +155,6 @@ else fi done done - # --------tensorrt mobilenet on linux/mac------ if [ $USE_TENSORRT == ON -a $TEST_GPU_CPU == ON ]; then rm -rf * @@ -167,6 +176,6 @@ else exit 1 fi fi - done -fi + fi +done set +x diff --git a/paddle/fluid/inference/api/paddle_infer_declare.h b/paddle/fluid/inference/api/paddle_infer_declare.h index 39c9653f16c..e8525f440fe 100644 --- a/paddle/fluid/inference/api/paddle_infer_declare.h +++ b/paddle/fluid/inference/api/paddle_infer_declare.h @@ -17,11 +17,7 @@ #if defined(_WIN32) #ifndef PD_INFER_DECL #ifdef PADDLE_DLL_INFERENCE -#ifndef PADDLE_ON_INFERENCE -#define PD_INFER_DECL -#else #define PD_INFER_DECL __declspec(dllexport) -#endif // PADDLE_ON_INFERENCE #else #define PD_INFER_DECL __declspec(dllimport) #endif // PADDLE_DLL_INFERENCE diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 60e4496bc54..524c086c079 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -26,6 +26,7 @@ wmic process where name="op_function_generator.exe" call terminate 2>NUL rem ------initialize common variable------ if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR="C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v10.0" if not defined BRANCH set BRANCH=develop +if not defined TENSORRT_ROOT set TENSORRT_ROOT="C:/TensorRT-5.1.5.0" if not defined WITH_MKL set WITH_MKL=ON if not defined WITH_GPU set WITH_GPU=OFF if not defined WITH_AVX set WITH_AVX=ON @@ -33,9 +34,11 @@ if not defined WITH_TESTING set WITH_TESTING=ON if not defined WITH_PYTHON set WITH_PYTHON=ON if not defined ON_INFER set ON_INFER=ON if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON +if not defined WITH_STATIC_LIB set WITH_STATIC_LIB=ON if not defined WITH_CACHE set WITH_CACHE=ON if not defined WITH_TPCACHE set WITH_TPCACHE=ON + rem -------set cache build work directory----------- if "%WITH_CACHE%"=="OFF" ( rmdir build /s/q @@ -99,6 +102,7 @@ set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G clcache.exe -M 21474836480 + rem ------set cache third_party------ set cache_dir=%work_dir:Paddle=cache% dir %cache_dir% @@ -138,6 +142,7 @@ exit /b 1 :CASE_wincheck_mkl set WITH_MKL=ON set WITH_GPU=OFF +set MSVC_STATIC_CRT=ON call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error @@ -149,11 +154,13 @@ goto:success :CASE_wincheck_openblas set WITH_MKL=OFF set WITH_GPU=ON +set MSVC_STATIC_CRT=OFF rem Temporarily turn off WITH_INFERENCE_API_TEST on GPU due to compile hang set WITH_INFERENCE_API_TEST=OFF call :cmake || goto cmake_error call :build || goto build_error call :test_whl_pacakage || goto test_whl_pacakage_error +:: call :test_inference || goto test_inference_error goto:success rem "Other configurations are added here" @@ -172,12 +179,14 @@ set start=%start:~4,10% echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^ -DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_TOOLKIT_ROOT_DIR% ^ -DON_INFER=%ON_INFER% -DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^ --DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% +-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^ +-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% goto:eof :cmake_error @@ -282,7 +291,9 @@ dir %THIRD_PARTY_PATH:/=\%\install\mklml\lib dir %THIRD_PARTY_PATH:/=\%\install\mkldnn\bin dir %THIRD_PARTY_PATH:/=\%\install\warpctc\bin -set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% +set PATH=%THIRD_PARTY_PATH:/=\%\install\openblas\lib;%THIRD_PARTY_PATH:/=\%\install\openblas\bin;^ +%THIRD_PARTY_PATH:/=\%\install\zlib\bin;%THIRD_PARTY_PATH:/=\%\install\mklml\lib;^ +%THIRD_PARTY_PATH:/=\%\install\mkldnn\bin;%THIRD_PARTY_PATH:/=\%\install\warpctc\bin;%PATH% ctest.exe --output-on-failure -C Release -j 8 --repeat until-pass:4 after-timeout:4 goto:eof @@ -305,7 +316,7 @@ set end=%end:~4,10% call :timestamp "%start%" "%end%" "TestCases Total" cd %work_dir%\paddle\fluid\inference\api\demo_ci -%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo +%cache_dir%\tools\busybox64.exe bash run.sh %work_dir:\=/% %WITH_MKL% %WITH_GPU% %cache_dir:\=/%/inference_demo %TENSORRT_ROOT%/include %TENSORRT_ROOT%/lib %MSVC_STATIC_CRT% goto:eof :test_inference_error -- GitLab From d7b7dcd10e6cdb00f237c6a9ef8f1d562733043b Mon Sep 17 00:00:00 2001 From: Qi Li Date: Wed, 23 Sep 2020 12:48:42 +0800 Subject: [PATCH 050/117] fix cmake dependencies of test_recognize_digits, test=develop (#27475) --- paddle/fluid/train/CMakeLists.txt | 2 +- python/paddle/fluid/tests/book/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/train/CMakeLists.txt b/paddle/fluid/train/CMakeLists.txt index 235d92ac4f9..d587081fbac 100644 --- a/paddle/fluid/train/CMakeLists.txt +++ b/paddle/fluid/train/CMakeLists.txt @@ -26,7 +26,7 @@ function(train_test TARGET_NAME) ARGS --dirname=${PYTHON_TESTS_DIR}/book/${TARGET_NAME}${arg}.train.model/) endif() set_tests_properties(test_train_${TARGET_NAME}${arg} - PROPERTIES DEPENDS test_${TARGET_NAME}) + PROPERTIES FIXTURES_REQUIRED test_${TARGET_NAME}_infer_model) if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_train_${TARGET_NAME}${arg} PROPERTIES TIMEOUT 150) diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index 673c965b662..96321aae566 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -4,4 +4,5 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") # default test foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) + set_tests_properties(${src} PROPERTIES FIXTURES_SETUP ${src}_infer_model) endforeach() -- GitLab From bc5f0246a807728593c889d7924c921e88ffe643 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 23 Sep 2020 14:26:37 +0800 Subject: [PATCH 051/117] large scale kv speedup (#26510) * rename communicator meet->BatchesCounter * fix parame recv for sparse * geo sparse init from pserver * optimize init from pserver * add large scale optimizer fuse(SGD/ADAM) * rectification init_worker and exe.run startup program --- .../operators/distributed/communicator.cc | 94 ++++++---- .../operators/distributed/communicator.h | 10 +- .../operators/distributed/parameter_recv.cc | 86 ++++++++- .../lookup_sparse_table_fuse_adam_op.cc | 153 ++++++++++++++++ .../lookup_sparse_table_fuse_adam_op.h | 142 +++++++++++++++ .../lookup_sparse_table_fuse_sgd_op.cc | 120 ++++++++++++ .../lookup_sparse_table_fuse_sgd_op.h | 105 +++++++++++ .../operators/distributed_ops/recv_op.cc | 11 +- .../fleet/runtime/parameter_server_runtime.py | 6 +- .../distribute_transpiler/__init__.py | 4 +- .../fleet/parameter_server/ir/pserver_pass.py | 87 ++++++++- .../fleet/parameter_server/ir/trainer_pass.py | 16 -- .../incubate/fleet/tests/fleet_deep_ctr.py | 2 +- .../fluid/tests/unittests/dist_fleet_ctr.py | 6 +- .../tests/unittests/dist_fleet_ctr_ps_gpu.py | 5 +- .../tests/unittests/dist_fleet_heter_ctr.py | 5 +- .../tests/unittests/dist_fleet_simnet_bow.py | 2 +- .../dist_fleet_sparse_embedding_ctr.py | 3 +- .../unittests/test_communicator_async.py | 5 +- .../tests/unittests/test_communicator_geo.py | 2 +- .../unittests/test_communicator_half_async.py | 2 +- .../tests/unittests/test_communicator_sync.py | 6 +- .../test_dist_fleet_a_sync_optimizer_async.py | 30 +-- .../test_dist_fleet_a_sync_optimizer_sync.py | 15 +- .../tests/unittests/test_dist_fleet_ps4.py | 20 +- .../tests/unittests/test_dist_fleet_ps5.py | 3 +- .../tests/unittests/test_dist_fleet_ps6.py | 168 +++++++++++++++++ .../test_dist_lookup_sparse_table_fuse_ops.py | 171 ++++++++++++++++++ 28 files changed, 1137 insertions(+), 142 deletions(-) create mode 100644 paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc create mode 100644 paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h create mode 100644 paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc create mode 100644 paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h create mode 100644 python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py create mode 100644 python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py diff --git a/paddle/fluid/operators/distributed/communicator.cc b/paddle/fluid/operators/distributed/communicator.cc index b2cc9390fa2..a0ac82a6f4a 100644 --- a/paddle/fluid/operators/distributed/communicator.cc +++ b/paddle/fluid/operators/distributed/communicator.cc @@ -74,8 +74,12 @@ void AsyncCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx, } else { recv_threadpool_.reset(new ::ThreadPool(thread_pool_size_)); } + + InitParams(); } +void AsyncCommunicator::InitParams() { RecvNoBarrier(); } + AsyncCommunicator::~AsyncCommunicator() { running_ = false; if (main_thread_) main_thread_->join(); @@ -157,16 +161,18 @@ void AsyncCommunicator::MainThread() { } while (running_) { - int meet = Meet(); - - VLOG(1) << "async_meet: " << meet; - - SendGlobalStep(meet); - SendByCommunicator(meet); - BarrierSend(); - RecvByCommunicator(); - BarrierRecv(); - BarrierWeakUp(); + int batches = BatchesCounter(); + + if (batches > 0) { + SendGlobalStep(batches); + SendByCommunicator(batches); + BarrierSend(); + RecvByCommunicator(); + BarrierRecv(); + BarrierWeakUp(); + } else { + VLOG(1) << "get nothing from sending queue, will skip send/recv"; + } } VLOG(1) << "communicator stopped, send thread exit"; } @@ -187,7 +193,7 @@ void AsyncCommunicator::RecvNoBarrier() { auto &var_name = iter.first; VLOG(4) << "recv var " << var_name; auto recv_functor = distributed::ParameterRecv(); - recv_functor(iter.second, *recv_scope_, false); + recv_functor(iter.second, *recv_scope_); }; task_futures.emplace_back(recv_threadpool_->enqueue(std::move(recv_task))); } @@ -197,7 +203,7 @@ void AsyncCommunicator::RecvNoBarrier() { } } -int AsyncCommunicator::Meet() { +int AsyncCommunicator::BatchesCounter() { auto &step_queue = send_varname_to_queue_.at(STEP_COUNTER); size_t merged_var_num = 0; @@ -316,7 +322,7 @@ void HalfAsyncCommunicator::Clean() { } } -int HalfAsyncCommunicator::Meet() { +int HalfAsyncCommunicator::BatchesCounter() { while (running_) { if (barrier_counter_.load() >= barrier_trigger_.load() && barrier_trigger_.load() != 0) { @@ -443,7 +449,7 @@ void GeoCommunicator::InitImpl(const RpcCtxMap &send_varname_to_ctx, old_scope_.reset(new Scope()); pserver_scope_.reset(new Scope()); - Init(); + InitParams(); } void GeoCommunicator::Send(const std::vector &var_names, @@ -626,9 +632,7 @@ void GeoCommunicator::RecvByCommunicator() { if (recv_ctx.is_sparse) { RecvSparse(var_name); } else { - VLOG(1) << "recv dense " << var_name << " begin"; RecvDense(var_name); - VLOG(1) << "recv dense " << var_name << " done"; } }; tasks.emplace_back(send_threadpool_->enqueue(std::move(recv_task))); @@ -696,7 +700,7 @@ void GeoCommunicator::RecvDense(const std::string &varname) { auto &ctx = recv_varname_to_ctx_.at(varname); auto recv = distributed::ParameterRecv(); - recv(ctx, *pserver_scope_, true); + recv(ctx, *pserver_scope_); PADDLE_ENFORCE_EQ( var_psrever->IsInitialized(), true, @@ -721,7 +725,7 @@ void GeoCommunicator::RecvDense(const std::string &varname) { t_timestamp->data()); } -void GeoCommunicator::Init() { +void GeoCommunicator::InitParams() { std::vector> tasks; tasks.reserve(recv_varname_to_ctx_.size()); @@ -744,12 +748,17 @@ void GeoCommunicator::Init() { } void GeoCommunicator::InitDense(const std::string varname) { - auto *var = old_scope_->Var(varname); - var->GetMutable(); - auto &ctx = recv_varname_to_ctx_.at(varname); auto recv = distributed::ParameterRecv(); - recv(ctx, *old_scope_); + recv(ctx, *recv_scope_); + + auto *global_var = recv_scope_->FindVar(varname); + global_var->GetMutable(); + + auto *old_var = old_scope_->Var(varname); + old_var->GetMutable(); + + framework::CopyVariable(*global_var, old_var); VLOG(1) << "init dense variable " << varname << " done"; } @@ -781,22 +790,41 @@ void GeoCommunicator::InitSparse() { LargeScaleKV::Init(metas); - for (size_t i = 0; i < metas.size(); i++) { - auto &varname = metas[i].name; - auto &dict = dicts[i]; + for (auto &meta : metas) { + auto &ctx = recv_varname_to_ctx_.at(meta.name); + auto recv = distributed::ParameterRecv(); - std::vector ids; - ids.reserve(dict); + auto *global_var = recv_scope_->FindVar(meta.name); + auto global_value = global_var->Get(); + auto rows = global_value.dims()[0]; + auto dim1 = global_value.dims()[1]; - for (auto j = 0; j < dict; ++j) { - ids.push_back(j); - } + recv(ctx, *recv_scope_); + VLOG(1) << "recv " << meta.name << " with global scope for init"; + + auto n_rows = global_var->Get().dims()[0]; + + PADDLE_ENFORCE_EQ( + rows, n_rows, + platform::errors::InvalidArgument( + "global var: %s origin dim must equal recved rows", meta.name)); + + std::vector ids(rows); + std::iota(ids.begin(), ids.end(), 0); auto *ins = distributed::LargeScaleKV::GetInstance(); - ins->Get(varname)->Init(ids); + std::vector *>> values; + + ins->Get(meta.name)->Init(ids); + ins->Get(meta.name)->Get(ids, {"Param"}, &values); - VLOG(3) << "GeoCommunicator init sparse " << varname << " with size " - << ids.size(); + auto blas = math::GetBlas( + paddle::platform::CPUDeviceContext()); + + for (auto &id : ids) { + blas.VCOPY(dim1, global_value.data() + id * dim1, + values[id][0]->data()); + } } VLOG(3) << "init sparse variable done"; diff --git a/paddle/fluid/operators/distributed/communicator.h b/paddle/fluid/operators/distributed/communicator.h index 2f6da150d1e..4a9a9eb1701 100644 --- a/paddle/fluid/operators/distributed/communicator.h +++ b/paddle/fluid/operators/distributed/communicator.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/large_scale_kv.h" @@ -279,6 +281,8 @@ class AsyncCommunicator : public Communicator { const RpcCtxMap &recv_varname_to_ctx, Scope *recv_scope) override; + void InitParams(); + void MainThread(); void Send(const std::vector &var_names, @@ -293,7 +297,7 @@ class AsyncCommunicator : public Communicator { virtual void RecvNoBarrier(); - virtual int Meet(); + virtual int BatchesCounter(); virtual void BarrierSend() {} @@ -350,7 +354,7 @@ class HalfAsyncCommunicator : public AsyncCommunicator { void BarrierTriggerReset(int initial_val) override; - int Meet(); + int BatchesCounter(); void BarrierWeakUp(); @@ -435,7 +439,7 @@ class GeoCommunicator : public AsyncCommunicator { void RecvDense(const std::string &varname); - void Init(); + void InitParams(); void InitSparse(); diff --git a/paddle/fluid/operators/distributed/parameter_recv.cc b/paddle/fluid/operators/distributed/parameter_recv.cc index 5409ec54987..3b8479c91b0 100644 --- a/paddle/fluid/operators/distributed/parameter_recv.cc +++ b/paddle/fluid/operators/distributed/parameter_recv.cc @@ -41,8 +41,67 @@ using SelectedRows = framework::SelectedRows; using DDim = framework::DDim; template -void RecvSelectedRows(const CommContext &rpc_ctx, - const framework::Scope &scope) { +void RecvSparseLodTensor(const CommContext &rpc_ctx, + const framework::Scope &scope) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto cpu_place = platform::CPUPlace(); + auto &cpu_ctx = *pool.Get(cpu_place); + + distributed::RPCClient *rpc_client = + distributed::RPCClient::GetInstance(rpc_ctx.trainer_id); + + std::unique_ptr local_scope = scope.NewTmpScope(); + std::vector tensors; + std::vector rets; + for (size_t i = 0; i < rpc_ctx.splited_varnames.size(); i++) { + auto &recv_var_name = rpc_ctx.splited_varnames[i]; + auto *local_var = local_scope->Var(recv_var_name); + VLOG(4) << "recv " << recv_var_name << " from " << rpc_ctx.epmap[i]; + // sparse param in recv_scope is LoDTensor + rets.push_back(rpc_client->AsyncGetVarNoBarrier( + rpc_ctx.epmap[i], cpu_ctx, *local_scope.get(), recv_var_name, + recv_var_name)); + + const auto *value = local_var->Get().data(); + tensors.push_back(value); + } + + for (size_t i = 0; i < rets.size(); i++) { + PADDLE_ENFORCE_NE(rets[i]->Wait(), 0U, platform::errors::ExecutionTimeout( + "internal error in RPCClient")); + } + + auto *merged_var = scope.FindVar(rpc_ctx.var_name); + + if (merged_var == nullptr || !merged_var->IsInitialized()) { + PADDLE_THROW( + platform::errors::InvalidArgument("%s must initialized at first.")); + } + auto dims1 = merged_var->Get().dims()[1]; + int64_t height = 0; + for (size_t i = 0; i < rpc_ctx.splited_varnames.size(); i++) { + auto *splited_var = local_scope->FindVar(rpc_ctx.splited_varnames[i]); + height += splited_var->Get().dims()[0]; + } + + PADDLE_ENFORCE_EQ(merged_var->Get().dims()[0], height, + "recved var must has same dims with local var"); + + auto *merged_t = merged_var->GetMutable(); + auto *merged_d = merged_t->mutable_data(cpu_place); + + auto pserver_num = rpc_ctx.splited_varnames.size(); + for (int x = 0; x < height; ++x) { + auto id = x % pserver_num; + auto idx = x / pserver_num; + std::memcpy(merged_d + x * dims1, tensors[id] + idx * dims1, + sizeof(float) * dims1); + } +} + +template +void RecvGeoSparseRecords(const CommContext &rpc_ctx, + const framework::Scope &scope) { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto cpu_place = platform::CPUPlace(); auto &cpu_ctx = *pool.Get(cpu_place); @@ -84,9 +143,14 @@ void RecvSelectedRows(const CommContext &rpc_ctx, ids_num += recv_t.rows().size(); width = recv_t.value().dims()[1]; - std::transform(recv_t.rows().begin(), recv_t.rows().end(), - std::back_inserter(all_ids), - [&](int64_t id) { return id * pserver_num + i; }); + if (rpc_ctx.is_distributed) { + std::copy(recv_t.rows().begin(), recv_t.rows().end(), + std::back_inserter(all_ids)); + } else { + std::transform(recv_t.rows().begin(), recv_t.rows().end(), + std::back_inserter(all_ids), + [&](int64_t id) { return id * pserver_num + i; }); + } } auto *var = scope.FindVar(rpc_ctx.var_name); @@ -146,7 +210,8 @@ void RecvLodTensor(const CommContext &rpc_ctx, const framework::Scope &scope) { template void ParameterRecv::operator()(const CommContext &rpc_ctx, - const framework::Scope &scope, bool barrier) { + const framework::Scope &scope, + bool geo_records) { VLOG(3) << "ParameterRecv in " << rpc_ctx.var_name; PADDLE_ENFORCE_GE(rpc_ctx.origin_varnames.size(), 1, @@ -154,18 +219,21 @@ void ParameterRecv::operator()(const CommContext &rpc_ctx, "origin_varnames.size() >= 1 is permitted")); if (rpc_ctx.is_sparse) { - RecvSelectedRows(rpc_ctx, scope); + if (geo_records) { + RecvGeoSparseRecords(rpc_ctx, scope); + } else { + RecvSparseLodTensor(rpc_ctx, scope); + } } else { RecvLodTensor(rpc_ctx, scope); } VLOG(3) << "ParameterRecv out " << rpc_ctx.var_name; } - template void ParameterRecv::operator()(const CommContext &rpc_ctx, const framework::Scope &scope) { - this->operator()(rpc_ctx, scope, true); + this->operator()(rpc_ctx, scope, false); } template struct ParameterRecv; diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc new file mode 100644 index 00000000000..e53ce8cc67c --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.cc @@ -0,0 +1,153 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h" + +#include +namespace paddle { +namespace operators { + +class LargeScaleFuseAdamOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of LargeScaleFuseAdamOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("LearningRate"), + "Input(LearningRate) of LargeScaleFuseAdamOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + + PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function."); + + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Grad"); + return framework::OpKernelType(data_type, ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + if (var_name == "LearningRate") { + return framework::OpKernelType(tensor.type(), tensor.place(), + tensor.layout()); + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class LargeScaleFuseAdamOpInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override { + auto in_var_type = ctx->GetInputType("Grad"); + PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS || + in_var_type == framework::proto::VarType::LOD_TENSOR, + true, platform::errors::InvalidArgument( + "The input Var's type should be LoDtensor or " + "SelectedRows, but the received type is %s", + in_var_type)); + } +}; + +class LargeScaleFuseAdamOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Grad", + "(SelectedRows) Ids's type should be SelectedRows" + "THe ids to be looked up in W."); + + AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); + AddInput("Beta2Pow", "(Tensor) Input beta2 power accumulator"); + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); + AddOutput("Beta1PowOut", "(Tensor) Output beta1 power accumulator"); + AddOutput("Beta2PowOut", "(Tensor) Output beta2 power accumulator"); + + AddAttr("beta1", + "(float, default 0.9) " + "Exponential decay rate for the " + "first moment estimates.") + .SetDefault(0.9f); + + AddAttr("beta2", + "(float, default 0.999) " + "exponential decay rate for the " + "second moment estimates.") + .SetDefault(0.999f); + + AddAttr("epsilon", + "(float, default 1.0e-8) " + "Constant for numerical stability") + .SetDefault(1.0e-8f); + + AddAttr("is_entry", + "(bool)" + "sparse table need entry"); + + AddAttr("tablename", + "(string)" + "sparse table name"); + + AddAttr>("value_names", + "(strings)" + "sparse table name"); + + AddComment(R"DOC( +Adam Optimizer. + +This implements the Adam optimizer from Section 2 of the Adam +paper : https://arxiv.org/abs/1412.6980. +Adam is a first-order gradient-based optimization method based on +adaptive estimates of lower-order moments. + +Adam updates: + +$$ +moment\_1\_out = \beta_1 * moment\_1 + (1 - \beta_1) * grad \\ +moment\_2_\out = \beta_2 * moment\_2 + (1 - \beta_2) * grad * grad \\ +learning\_rate = learning\_rate * + \frac{\sqrt{1 - \beta_{2\_pow}}}{1 - \beta_{1\_pow}} \\ +param\_out = param - learning\_rate * \frac{moment\_1}{\sqrt{moment\_2} + \epsilon} +$$ + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + lookup_sparse_table_fuse_adam, ops::LargeScaleFuseAdamOp, + ops::LargeScaleFuseAdamOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker, + ops::LargeScaleFuseAdamOpInferVarType); + +REGISTER_OP_CPU_KERNEL( + lookup_sparse_table_fuse_adam, + ops::LargeScaleFuseAdamOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h new file mode 100644 index 00000000000..89b8d54a463 --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h @@ -0,0 +1,142 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include // for sqrt in CPU and CUDA +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/distributed/large_scale_kv.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" + +namespace paddle { +namespace operators { + +template +class LargeScaleFuseAdamOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override; +}; + +template +class LargeScaleFuseAdamOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + using paddle::framework::LoDTensor; + + const auto *learning_rate = ctx.Input("LearningRate"); + const auto *grad_var = ctx.InputVar("Grad"); + + PADDLE_ENFORCE( + grad_var->IsType(), + platform::errors::InvalidArgument( + "in large scale optimize, gradient should only be SelectedRows")); + + const auto &grad = grad_var->Get(); + + // for distributed training, a sparse var may be empty, + // just skip updating. + if (grad.rows().size() == 0) { + return; + } + + framework::SelectedRows tmp_grad_merge; + const framework::SelectedRows *grad_merge_ptr; + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, + &tmp_grad_merge, true); + grad_merge_ptr = &tmp_grad_merge; + + std::vector in_rows; + in_rows.reserve(grad_merge_ptr->rows().size()); + std::copy(grad_merge_ptr->rows().begin(), grad_merge_ptr->rows().end(), + std::back_inserter(in_rows)); + + const auto *lr = learning_rate->data(); + auto grad_v = grad_merge_ptr->value(); + auto grad_width = grad_v.dims()[1]; + + // auto is_entry = context.Attr("is_entry"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); + + auto *beta1_pow = ctx.Input("Beta1Pow"); + auto *beta2_pow = ctx.Input("Beta2Pow"); + auto *beta1_pow_out = ctx.Output("Beta1PowOut"); + auto *beta2_pow_out = ctx.Output("Beta2PowOut"); + T epsilon = static_cast(ctx.Attr("epsilon")); + T beta1 = static_cast(ctx.Attr("beta1")); + T beta2 = static_cast(ctx.Attr("beta2")); + + PADDLE_ENFORCE_EQ(beta1_pow_out->numel(), 1, + platform::errors::InvalidArgument( + "beta1 pow output size should be 1, but received " + "value is:%d.", + beta1_pow_out->numel())); + + PADDLE_ENFORCE_EQ(beta2_pow_out->numel(), 1, + platform::errors::InvalidArgument( + "beta2 pow output size should be 1, but received " + "value is:%d.", + beta2_pow_out->numel())); + + // update beta1 and beta2 + beta1_pow_out->mutable_data(ctx.GetPlace())[0] = + beta1 * beta1_pow->data()[0]; + beta2_pow_out->mutable_data(ctx.GetPlace())[0] = + beta2 * beta2_pow->data()[0]; + + std::vector *>> values; + std::vector dims; + + auto *ins = distributed::LargeScaleKV::GetInstance(); + auto *table = ins->Get(tablename); + table->Get(in_rows, value_names, &values); + table->Dims({"Param"}, &dims); + + PADDLE_ENFORCE_EQ(dims[0], grad_width, + platform::errors::InvalidArgument( + "param_row should have the same size with grad_row")); + + T lr_ = lr[0]; + T beta1_pow_ = beta1_pow->data()[0]; + T beta2_pow_ = beta2_pow->data()[0]; + + lr_ *= sqrt(1 - beta2_pow_) / (1 - beta1_pow_); + + for (size_t i = 0; i < in_rows.size(); i++) { + auto ¶ms = values[i][0]; + auto &moment_1 = values[i][1]; + auto &moment_2 = values[i][2]; + + auto *p_data = params->data(); + auto *m1_data = moment_1->data(); + auto *m2_data = moment_2->data(); + + for (int x = 0; x < grad_width; ++x) { + auto g = grad_v.data()[grad_width * i + x]; + m1_data[x] = beta1 * m1_data[x] + (1 - beta1) * g; + m2_data[x] = beta2 * m2_data[x] + (1 - beta2) * g * g; + p_data[x] -= lr_ * (m1_data[x] / (sqrt(m2_data[x]) + epsilon)); + } + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc new file mode 100644 index 00000000000..010658b5280 --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.cc @@ -0,0 +1,120 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h" + +#include +namespace paddle { +namespace operators { + +class LargeScaleFuseSGDOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of LargeScaleFuseSGDOp should not be null."); + PADDLE_ENFORCE( + ctx->HasInput("LearningRate"), + "Input(LearningRate) of LargeScaleFuseSGDOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); + + PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function."); + + PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, + "Learning rate should have 1 element"); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "Grad"); + return framework::OpKernelType(data_type, ctx.device_context()); + } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const framework::Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + if (var_name == "LearningRate") { + return framework::OpKernelType(tensor.type(), tensor.place(), + tensor.layout()); + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } +}; + +class LargeScaleFuseSGDOpInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext *ctx) const override { + auto in_var_type = ctx->GetInputType("Grad"); + PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS || + in_var_type == framework::proto::VarType::LOD_TENSOR, + true, platform::errors::InvalidArgument( + "The input Var's type should be LoDtensor or " + "SelectedRows, but the received type is %s", + in_var_type)); + } +}; + +class LargeScaleFuseSGDOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Grad", + "(SelectedRows) Ids's type should be SelectedRows" + "THe ids to be looked up in W."); + AddInput("LearningRate", "(Tensor) Learning rate of SGD"); + AddAttr("is_entry", + "(bool)" + "sparse table need entry"); + + AddAttr("tablename", + "(string)" + "sparse table name"); + + AddAttr>("value_names", + "(strings)" + "sparse table name"); + + AddComment(R"DOC( + +LargeScaleFuseSGD operator + +This operator implements one step of the stochastic gradient descent algorithm. + +$$param\_out = param - learning\_rate * grad$$ + +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR( + lookup_sparse_table_fuse_sgd, ops::LargeScaleFuseSGDOp, + ops::LargeScaleFuseSGDOpMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker, + ops::LargeScaleFuseSGDOpInferVarType); + +REGISTER_OP_CPU_KERNEL( + lookup_sparse_table_fuse_sgd, + ops::LargeScaleFuseSGDOpKernel); diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h new file mode 100644 index 00000000000..5d4bf1015fa --- /dev/null +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_sgd_op.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/operators/distributed/large_scale_kv.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" + +namespace paddle { +namespace operators { + +template +class LargeScaleFuseSGDOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override; +}; + +template +class LargeScaleFuseSGDOpKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *learning_rate = ctx.Input("LearningRate"); + + const auto *grad_var = ctx.InputVar("Grad"); + + PADDLE_ENFORCE( + grad_var->IsType(), + platform::errors::InvalidArgument( + "in large scale optimize, gradient should only be SelectedRows")); + + const auto &grad = grad_var->Get(); + + // for distributed training, a sparse var may be empty, + // just skip updating. + if (grad.rows().size() == 0) { + return; + } + + framework::SelectedRows tmp_grad_merge; + const framework::SelectedRows *grad_merge_ptr; + math::scatter::MergeAdd merge_func; + merge_func(ctx.template device_context(), grad, + &tmp_grad_merge, true); + grad_merge_ptr = &tmp_grad_merge; + + std::vector in_rows; + in_rows.reserve(grad_merge_ptr->rows().size()); + std::copy(grad_merge_ptr->rows().begin(), grad_merge_ptr->rows().end(), + std::back_inserter(in_rows)); + + const auto *lr = learning_rate->data(); + auto grad_v = grad_merge_ptr->value(); + auto grad_width = grad_v.dims()[1]; + + // auto is_entry = context.Attr("is_entry"); + auto tablename = ctx.Attr("tablename"); + auto value_names = ctx.Attr>("value_names"); + + std::vector *>> values; + std::vector dims; + + auto *ins = distributed::LargeScaleKV::GetInstance(); + auto *table = ins->Get(tablename); + table->Get(in_rows, value_names, &values); + table->Dims({"Param"}, &dims); + + PADDLE_ENFORCE_EQ(dims[0], grad_width, + platform::errors::InvalidArgument( + "param_row should have the same size with grad_row")); + + auto blas = math::GetBlas(ctx); + + std::vector grads; + framework::TensorToVector(grad_v, ctx.device_context(), &grads); + + blas.SCAL(grads.size(), lr[0], grads.data()); + + for (int x = 0; x < static_cast(in_rows.size()); ++x) { + auto ¶ms = values[x][0]; + blas.VSUB(grad_width, params->data(), grads.data() + grad_width * x, + params->data()); + } + } +}; +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc index 15b36baeada..2547ba3acb1 100644 --- a/paddle/fluid/operators/distributed_ops/recv_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_op.cc @@ -37,12 +37,6 @@ class RecvOp : public framework::OperatorBase { void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { - int do_not_run = Attr("do_not_run"); - if (do_not_run) { - VLOG(3) << "recv do not run!"; - return; - } - std::vector epmap = Attr>("epmap"); std::vector varnames = Attr>("varnames"); @@ -63,11 +57,10 @@ class RecvOp : public framework::OperatorBase { if (recv_varnames.size() > 0) { auto *communicator = distributed::Communicator::GetInstance(); - if (communicator == nullptr) { + if (communicator != nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( - "need run fleet.init_worker first")); + "execute startup program must before fleet.init_worker")); } - communicator->RecvNoBarrier(); } else { std::vector rets; if (with_barrier) { diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index ae5c53b8a37..6dd4661f000 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -220,12 +220,12 @@ class ParameterServerRuntime(RuntimeBase): else: model_dirname = None - if self.role_maker._is_heter_worker(): - self._init_worker() - executor = self._get_executor() executor.run(fluid.default_startup_program()) + if self.role_maker._is_heter_worker(): + self._init_worker() + if self.role_maker._is_heter_worker(): return diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py index 236cb458be4..e556a98ed75 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/distribute_transpiler/__init__.py @@ -191,12 +191,14 @@ class FleetTranspiler(Fleet): self._communicator = Communicator( trainer_config.mode, kwargs, trainer_config.get_communicator_flags()) + self._communicator.init_with_ctx(send_ctx, recv_ctx) if not self._communicator.is_running(): self._communicator.start() else: - warnings.warn("communicator has been initialized, skip") + raise ValueError( + "Communicator can only be inited once, please check") def init_worker(self): """ diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py index 05deff10a2e..a60c4e149f5 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/pserver_pass.py @@ -624,6 +624,7 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): value_dims = [] grad = None opt_idx = -1 + fuse = False for op in block.ops: opt_idx += 1 @@ -631,6 +632,9 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): if op.type not in opt_value_map.keys(): continue + if op.type in ["sgd", "adam"]: + fuse = True + grad = main_program.global_block().vars[op.input("Grad")[0]] for value in opt_value_map[op.type]: @@ -644,7 +648,67 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): if value_names: break - return grad, opt_idx, value_names, value_dims, acture_names + return grad, opt_idx, value_names, value_dims, acture_names, fuse + + def add_fuse_large_scale_op(block, global_block, table_name, value_names, + acture_names, grad, is_entry, opt_idx): + + op = block.ops[opt_idx] + + if op.type == "sgd": + grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] + + block._insert_op( + opt_idx, + type="lookup_sparse_table_fuse_sgd", + inputs={"Grad": grad, + "LearningRate": lr}, + attrs={ + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) + + elif op.type == "adam": + grad = main_program.global_block().vars[op.input("Grad")[0]] + lr = main_program.global_block().vars[op.input("LearningRate")[0]] + beta1_pow = main_program.global_block().vars[op.input("Beta1Pow")[ + 0]] + beta2_pow = main_program.global_block().vars[op.input("Beta2Pow")[ + 0]] + beta1_pow_o = main_program.global_block().vars[op.output( + "Beta1PowOut")[0]] + beta2_pow_o = main_program.global_block().vars[op.output( + "Beta2PowOut")[0]] + + beta1 = op.attr('beta1') + beta2 = op.attr('beta2') + epsilon = op.attr('epsilon') + + block._insert_op( + opt_idx, + type="lookup_sparse_table_fuse_adam", + inputs={ + "Grad": grad, + "LearningRate": lr, + "Beta1Pow": beta1_pow, + "Beta2Pow": beta2_pow + }, + outputs={ + "Beta1PowOut": beta1_pow_o, + "Beta2PowOut": beta2_pow_o + }, + attrs={ + "beta1": beta1, + "beta2": beta2, + "epsilon": epsilon, + "is_entry": is_entry, + "tablename": table_name, + "value_names": value_names + }) + else: + raise ValueError("only support sgd/adam optimizer now") def add_large_scale_op(block, global_block, table_name, value_names, acture_names, grad, is_entry, opt_idx): @@ -711,24 +775,35 @@ def large_scale_sparse_pass(program, main_program, config, is_startup=False): for param, blockid in param_blockid_map.items(): opt_block = program.block(blockid) - grad, opt_idx, value_names, value_dims, acture_names = \ + grad, opt_idx, value_names, value_dims, acture_names, fuse = \ get_optimizer_values(opt_block) entry_attr = get_entry_attr(param) is_entry = False if entry_attr == "none" else True - add_large_scale_op(opt_block, - program.global_block(), param, value_names, - acture_names, grad, is_entry, opt_idx) + if fuse: + add_fuse_large_scale_op(opt_block, + program.global_block(), param, + value_names, acture_names, grad, + is_entry, opt_idx) + else: + add_large_scale_op(opt_block, + program.global_block(), param, value_names, + acture_names, grad, is_entry, opt_idx) else: large_scale_kv_metas = [] for param, blockid in param_blockid_map.items(): opt_block = main_program.block(blockid) - grad, _, value_names, value_dims, acture_names = \ + + grad, opt_idx, value_names, value_dims, acture_names, fuse = \ get_optimizer_values(opt_block) entry_attr = get_entry_attr(param) + if fuse: + # remove origin optimzier op + opt_block._remove_op(opt_idx) + # training/infer mode = "0" names_str = ",".join(value_names) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index 4543af9820e..3f826da3ae2 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -227,22 +227,6 @@ def init_from_server_pass(program, config): fetch_barrier_out = program.global_block().create_var( name=framework.generate_control_dev_var_name()) - recv_ctx = config.get_communicator_recv_context(recv_type=1) - recv_varnames = [] - - for name, ctxs in recv_ctx.items(): - recv_varnames.extend(ctxs.origin_varnames()) - - program.global_block().append_op( - type="recv", - inputs={"X": []}, - outputs={"Out": []}, - attrs={ - "recv_varnames": recv_varnames, - "trainer_id": config.get_role_id(), - RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE - }) - program.global_block().append_op( type="fetch_barrier", inputs={}, diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py index 60378aa9827..06a90b78fd2 100644 --- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py +++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py @@ -164,8 +164,8 @@ def train(args): elif fleet.is_worker(): logger.info("run trainer") - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() thread_num = 2 filelist = [] diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 8277499fcce..5721445c414 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -163,8 +163,10 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() + exe.run(fluid.default_startup_program()) + fleet.init_worker() + batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) @@ -202,8 +204,8 @@ class TestDistCTR2x2(FleetDistRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() thread_num = 2 batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py index 0e3c8099277..3852b225234 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -60,8 +60,9 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): device_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) - fleet.init_worker() + exe.run(fleet.startup_program) + fleet.init_worker() batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) @@ -104,8 +105,8 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): place = fluid.CUDAPlace(device_id) exe = fluid.Executor(place) - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() thread_num = 2 batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index 2f938a813d8..470fb98d799 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -152,8 +152,9 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() + batch_size = 4 train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size) self.reader.decorate_sample_list_generator(train_reader) @@ -176,8 +177,8 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 2ea69e1b676..ff848488739 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -222,8 +222,8 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() batch_size = 4 # reader train_reader = paddle.batch(fake_simnet_reader(), batch_size=batch_size) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index 77697896b4d..81530573a60 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -151,8 +151,9 @@ class TestDistCTR2x2(FleetDistRunnerBase): """ exe = fluid.Executor(fluid.CPUPlace()) - fleet.init_worker() + exe.run(fluid.default_startup_program()) + fleet.init_worker() batch_size = 4 diff --git a/python/paddle/fluid/tests/unittests/test_communicator_async.py b/python/paddle/fluid/tests/unittests/test_communicator_async.py index d032d6d75b5..a86b80b2cf9 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_async.py @@ -30,11 +30,10 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distribu class TestCommunicator(unittest.TestCase): def net(self): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) + x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = fluid.layers.square_error_cost(input=x, label=y) avg_cost = fluid.layers.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index d9fc9262b31..5916000fba7 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -83,8 +83,8 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) - fleet.init_worker() exe.run(fluid.default_startup_program()) + fleet.init_worker() train_reader = paddle.batch(self.fake_reader(), batch_size=24) feeder = fluid.DataFeeder(place=place, feed_list=[x, z, y]) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py index 391588780f3..b0f55f2939d 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_half_async.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_half_async.py @@ -71,8 +71,8 @@ class TestCommunicatorHalfAsyncEnd2End(unittest.TestCase): optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) - fleet.init_worker() exe.run(fleet.startup_program) + fleet.init_worker() train_reader = paddle.batch(self.fake_reader(), batch_size=24) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/test_communicator_sync.py index c0044d9d620..95b209b1460 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_sync.py @@ -27,11 +27,9 @@ import paddle.distributed.fleet as fleet class TestCommunicator(unittest.TestCase): def net(self): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) + x = fluid.layers.data(name='x', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') - - cost = fluid.layers.square_error_cost(input=y_predict, label=y) + cost = fluid.layers.square_error_cost(input=x, label=y) avg_cost = fluid.layers.mean(cost) return avg_cost diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index a82612b0ed2..7f55e956a94 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -44,16 +44,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True @@ -71,7 +66,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): sends += 1 if op.type == "sgd": sgds += 1 - self.assertEqual(sends, 7) + self.assertEqual(sends, 1) self.assertEqual(sgds, 0) fleet.init_worker() @@ -89,16 +84,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): paddle.fluid.framework.switch_startup_program(startup_program) fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index b05a53c88bb..db3f2afb366 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -36,16 +36,11 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): def test_gradient_merge_optimizer(self): fleet.init(role_maker.PaddleCloudRoleMaker()) - input_x = paddle.fluid.layers.data( - name="x", shape=[32], dtype='float32') - input_y = paddle.fluid.layers.data(name="y", shape=[1], dtype='int64') - fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') - fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') - prediction = paddle.fluid.layers.fc(input=[fc_2], size=2, act='softmax') - cost = paddle.fluid.layers.cross_entropy( - input=prediction, label=input_y) - avg_cost = paddle.fluid.layers.mean(x=cost) + x = paddle.fluid.layers.data(name='x', shape=[1], dtype='float32') + y = paddle.fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = paddle.fluid.layers.square_error_cost(input=x, label=y) + avg_cost = paddle.fluid.layers.mean(cost) strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = False @@ -63,7 +58,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): sends += 1 if op.type == "sgd": sgds += 1 - self.assertEqual(sends, 6) + self.assertEqual(sends, 0) self.assertEqual(sgds, 0) fleet.init_worker() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index 379bcaf684d..6fe52ba9fe6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -70,15 +70,13 @@ class TestPSPassWithBow(unittest.TestCase): q = fluid.layers.data( name="query_ids", shape=[1], dtype="int64", lod_level=1) # embedding - q_emb = fluid.layers.embedding( + q_emb = fluid.contrib.layers.sparse_embedding( input=q, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) q_emb = fluid.layers.reshape(q_emb, [-1, emb_dim]) # vsum q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum') @@ -97,15 +95,13 @@ class TestPSPassWithBow(unittest.TestCase): pt = fluid.layers.data( name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) # embedding - pt_emb = fluid.layers.embedding( + pt_emb = fluid.contrib.layers.sparse_embedding( input=pt, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) pt_emb = fluid.layers.reshape(pt_emb, [-1, emb_dim]) # vsum pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum') @@ -123,15 +119,13 @@ class TestPSPassWithBow(unittest.TestCase): nt = fluid.layers.data( name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) # embedding - nt_emb = fluid.layers.embedding( + nt_emb = fluid.contrib.layers.sparse_embedding( input=nt, - is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr), - is_sparse=is_sparse) + learning_rate=emb_lr)) nt_emb = fluid.layers.reshape(nt_emb, [-1, emb_dim]) # vsum nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum') @@ -167,7 +161,7 @@ class TestPSPassWithBow(unittest.TestCase): fleet.init(role) loss, acc, _ = self.net() - optimizer = fluid.optimizer.SGD(base_lr) + optimizer = fluid.optimizer.Adam(base_lr) strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index fd069793473..c570c4d8cd0 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -168,12 +168,13 @@ class TestPSPassWithBow(unittest.TestCase): fleet.init(role) loss, acc, _ = self.net() - optimizer = fluid.optimizer.SGD( + optimizer = fluid.optimizer.Adagrad( learning_rate=fluid.layers.exponential_decay( learning_rate=base_lr, decay_steps=500, decay_rate=0.969, staircase=True)) + strategy = StrategyFactory.create_async_strategy() optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(loss) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py new file mode 100644 index 00000000000..d5b1284e3ce --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -0,0 +1,168 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import paddle.fluid as fluid +import paddle.fluid.incubate.fleet.base.role_maker as role_maker +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet +from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory + +# For Net +base_lr = 0.2 +emb_lr = base_lr * 3 +dict_dim = 1500 +emb_dim = 128 +hid_dim = 128 +margin = 0.1 +sample_rate = 1 +batch_size = 4 + + +class TestPSPassWithBow(unittest.TestCase): + def net(self): + def get_acc(cos_q_nt, cos_q_pt, batch_size): + cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) + cond = fluid.layers.cast(cond, dtype='float64') + cond_3 = fluid.layers.reduce_sum(cond) + acc = fluid.layers.elementwise_div( + cond_3, + fluid.layers.fill_constant( + shape=[1], value=batch_size * 1.0, dtype='float64'), + name="simnet_acc") + return acc + + def get_loss(cos_q_pt, cos_q_nt): + loss_op1 = fluid.layers.elementwise_sub( + fluid.layers.fill_constant_batch_size_like( + input=cos_q_pt, + shape=[-1, 1], + value=margin, + dtype='float32'), + cos_q_pt) + loss_op2 = fluid.layers.elementwise_add(loss_op1, cos_q_nt) + loss_op3 = fluid.layers.elementwise_max( + fluid.layers.fill_constant_batch_size_like( + input=loss_op2, shape=[-1, 1], value=0.0, dtype='float32'), + loss_op2) + avg_cost = fluid.layers.mean(loss_op3) + return avg_cost + + is_distributed = False + is_sparse = True + + # query + q = fluid.layers.data( + name="query_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + q_emb = fluid.contrib.layers.sparse_embedding( + input=q, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + q_emb = fluid.layers.reshape(q_emb, [-1, emb_dim]) + # vsum + q_sum = fluid.layers.sequence_pool(input=q_emb, pool_type='sum') + q_ss = fluid.layers.softsign(q_sum) + # fc layer after conv + q_fc = fluid.layers.fc( + input=q_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__q_fc__", + learning_rate=base_lr)) + # label data + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + # pt + pt = fluid.layers.data( + name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + pt_emb = fluid.contrib.layers.sparse_embedding( + input=pt, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + pt_emb = fluid.layers.reshape(pt_emb, [-1, emb_dim]) + # vsum + pt_sum = fluid.layers.sequence_pool(input=pt_emb, pool_type='sum') + pt_ss = fluid.layers.softsign(pt_sum) + # fc layer + pt_fc = fluid.layers.fc( + input=pt_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__fc__", + learning_rate=base_lr), + bias_attr=fluid.ParamAttr(name="__fc_b__")) + # nt + nt = fluid.layers.data( + name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + # embedding + nt_emb = fluid.contrib.layers.sparse_embedding( + input=nt, + size=[dict_dim, emb_dim], + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__emb__", + learning_rate=emb_lr)) + nt_emb = fluid.layers.reshape(nt_emb, [-1, emb_dim]) + # vsum + nt_sum = fluid.layers.sequence_pool(input=nt_emb, pool_type='sum') + nt_ss = fluid.layers.softsign(nt_sum) + # fc layer + nt_fc = fluid.layers.fc( + input=nt_ss, + size=hid_dim, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.01), + name="__fc__", + learning_rate=base_lr), + bias_attr=fluid.ParamAttr(name="__fc_b__")) + cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc) + cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc) + # loss + avg_cost = get_loss(cos_q_pt, cos_q_nt) + # acc + acc = get_acc(cos_q_nt, cos_q_pt, batch_size) + return [avg_cost, acc, cos_q_pt] + + def test(self): + endpoints = [ + "127.0.0.1:36004", "127.0.0.1:36005", "127.0.0.1:36006", + "127.0.0.1:36007" + ] + + role = role_maker.UserDefinedRoleMaker( + current_id=0, + role=role_maker.Role.SERVER, + worker_num=2, + server_endpoints=endpoints) + + fleet.init(role) + loss, acc, _ = self.net() + optimizer = fluid.optimizer.Adagrad(base_lr) + strategy = StrategyFactory.create_async_strategy() + optimizer = fleet.distributed_optimizer(optimizer, strategy) + optimizer.minimize(loss) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py new file mode 100644 index 00000000000..bca91c536ba --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py @@ -0,0 +1,171 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle.fluid as fluid +import paddle.fluid.core as core + + +class TestLookupTableFuseOp(unittest.TestCase): + def test_fuse(self): + places = [core.CPUPlace()] + # currently only support CPU + for place in places: + self.check_with_place(place) + + def check_with_place(self, place): + scope = fluid.global_scope() + scope.var("LearningRate").get_tensor().set([0.01], place) + scope.var("Ids").get_tensor().set([i for i in range(100)], place) + + init_program = fluid.Program() + + lr = init_program.global_block().create_var( + name="LearningRate", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + ids = init_program.global_block().create_var( + name="Ids", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[100], + dtype="int64") + + output = init_program.global_block().create_var( + name="output", + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[100, 8], + dtype="float32") + + metas = [] + metas.append( + "embedding_1.block0:Param,Moment1,Moment2:8,8,8:0:embedding_1@GRAD.block0:embedding_1.block0,embedding_1_moment1_0,embedding_1_moment2_0,kSparseIDs@embedding_1.block0:uniform_random&0&-0.5&0.5,fill_constant&0.0,fill_constant&0.0:none" + ) + metas.append( + "embedding_2.block0:Param:8:0:embedding_2@GRAD.block0:embedding_2.block0,kSparseIDs@embedding_2.block0:uniform_random&0&-0.5&0.5:none" + ) + + init_program.global_block().append_op( + type="lookup_sparse_table_init", + inputs=None, + outputs=None, + attrs={"large_scale_metas": metas}) + + init_program.global_block().append_op( + type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": "embedding_1.block0", + "init": True, + "value_names": ["Param"], + }) + + init_program.global_block().append_op( + type="lookup_sparse_table_read", + inputs={"Ids": ids}, + outputs={"Out": output}, + attrs={ + "tablename": "embedding_2.block0", + "init": True, + "value_names": ["Param"], + }) + + executor = fluid.Executor(place) + executor.run(init_program) + + training_program = fluid.Program() + + scope.var('Beta1Pow').get_tensor().set( + np.array([0]).astype("float32"), place) + scope.var('Beta2Pow').get_tensor().set( + np.array([0]).astype("float32"), place) + + rows = [0, 1, 2, 3, 4, 5, 6] + row_numel = 8 + w_selected_rows = scope.var('Grad').get_selected_rows() + w_selected_rows.set_height(len(rows)) + w_selected_rows.set_rows(rows) + w_array = np.ones((len(rows), row_numel)).astype("float32") + for i in range(len(rows)): + w_array[i] *= i + w_tensor = w_selected_rows.get_tensor() + w_tensor.set(w_array, place) + + lr = training_program.global_block().create_var( + name="LearningRate", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + grads = training_program.global_block().create_var( + name="Grad", + persistable=True, + type=fluid.core.VarDesc.VarType.SELECTED_ROWS, + shape=[100, 8], + dtype="float32") + + beta1 = training_program.global_block().create_var( + name="Beta1Pow", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + beta2 = training_program.global_block().create_var( + name="Beta2Pow", + persistable=True, + type=fluid.core.VarDesc.VarType.LOD_TENSOR, + shape=[1], + dtype="float32") + + training_program.global_block().append_op( + type="lookup_sparse_table_fuse_adam", + inputs={ + "Grad": grads, + "LearningRate": lr, + "Beta1Pow": beta1, + "Beta2Pow": beta2, + }, + outputs={"Beta1PowOut": beta1, + "Beta2PowOut": beta2}, + attrs={ + "is_entry": False, + "tablename": "embedding_1.block0", + "value_names": ["Param", "Moment1", "Moment2"], + }) + + training_program.global_block().append_op( + type="lookup_sparse_table_fuse_sgd", + inputs={"Grad": grads, + "LearningRate": lr}, + attrs={ + "is_entry": False, + "tablename": "embedding_2.block0", + "value_names": ["Param"], + }) + + executor.run(training_program) + + +if __name__ == "__main__": + unittest.main() -- GitLab From 5508c7874492de3e95a2925d214225b6c8558747 Mon Sep 17 00:00:00 2001 From: LutaoChu <30695251+LutaoChu@users.noreply.github.com> Date: Wed, 23 Sep 2020 17:11:01 +0800 Subject: [PATCH 052/117] Fix bug: The calculation result of Diag_v2 Op under large size input is wrong (#27447) The calculation result of Diag_v2 Op under large size input is wrong --- paddle/fluid/operators/diag_v2_op.cu | 44 ++++++++------- .../paddle/fluid/tests/unittests/test_diag.py | 55 ++++++++++++++++++- 2 files changed, 77 insertions(+), 22 deletions(-) diff --git a/paddle/fluid/operators/diag_v2_op.cu b/paddle/fluid/operators/diag_v2_op.cu index 4386cc6b818..12ea31945f8 100644 --- a/paddle/fluid/operators/diag_v2_op.cu +++ b/paddle/fluid/operators/diag_v2_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/diag_v2_op.h" @@ -58,6 +59,17 @@ class DiagV2CUDAKernel : public framework::OpKernel { auto out_dims = out->dims(); auto& dev_ctx = context.template device_context(); + auto GetBlockGridSize = [&dev_ctx](int64_t size) { + const int64_t block_size = + std::min(size, static_cast(dev_ctx.GetMaxThreadsPerBlock())); + int64_t max_threads = dev_ctx.GetMaxPhysicalThreadCount(); + const int64_t max_blocks = std::max(((max_threads - 1) / block_size + 1), + static_cast(1)); + const int64_t grid_size = + std::min(max_blocks, (size + block_size - 1) / block_size); + return std::tuple{block_size, grid_size}; + }; + if (x_dims.size() == 1) { float padding_value = context.Attr("padding_value"); math::SetConstant set_padding_value; @@ -67,26 +79,23 @@ class DiagV2CUDAKernel : public framework::OpKernel { auto size = (offset > 0) ? x_length + offset : x_length - offset; const int& x_stride = ComputeStride(0, x_dims); if (size > 0) { - const int block_num = std::min(static_cast(size), - dev_ctx.GetMaxPhysicalThreadCount()); - int size_ = static_cast(size); - int block_num_ = static_cast(block_num); - const int grid_num = - std::min(1024, (size_ + block_num_ - 1) / block_num_); const auto& out_stride_0 = ComputeStride(0, out_dims); const auto& out_stride_1 = ComputeStride(1, out_dims); auto start = (offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0); - PasteDiagonalKernel<<>>( - out_data, x_data, start, x_length, out_stride_0 + out_stride_1, - x_stride); + std::tuple block_grid_size = GetBlockGridSize(size); + + PasteDiagonalKernel< + T><<(block_grid_size), std::get<0>(block_grid_size), 0, + dev_ctx.stream()>>>(out_data, x_data, start, x_length, + out_stride_0 + out_stride_1, x_stride); } } else { const int& x_stride_0 = ComputeStride(0, x_dims); const int& x_stride_1 = ComputeStride(1, x_dims); - int size; + int64_t size; if (offset > 0) { size = std::min(x_dims[0], x_dims[1] - offset); } else { @@ -94,18 +103,15 @@ class DiagV2CUDAKernel : public framework::OpKernel { } if (size > 0) { - const int block_num = std::min(static_cast(size), - dev_ctx.GetMaxPhysicalThreadCount()); - int size_ = static_cast(size); - int block_num_ = static_cast(block_num); - const int grid_num = - std::min(1024, (size_ + block_num_ - 1) / block_num_); auto start = (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0); const auto& out_stride_0 = ComputeStride(0, out_dims); - ExtractDiagonalKernel<<>>( - out_data, x_data, start, size, x_stride_0 + x_stride_1, - out_stride_0); + std::tuple block_grid_size = GetBlockGridSize(size); + + ExtractDiagonalKernel< + T><<(block_grid_size), std::get<0>(block_grid_size), 0, + dev_ctx.stream()>>>(out_data, x_data, start, size, + x_stride_0 + x_stride_1, out_stride_0); } } } diff --git a/python/paddle/fluid/tests/unittests/test_diag.py b/python/paddle/fluid/tests/unittests/test_diag.py index 780d57b5331..ddf1240e4ef 100644 --- a/python/paddle/fluid/tests/unittests/test_diag.py +++ b/python/paddle/fluid/tests/unittests/test_diag.py @@ -119,6 +119,16 @@ class TestDiagV2API(unittest.TestCase): (n, n)) + np.diag(self.input_np3, self.offset) - np.diag( self.padding_value * np.ones(n)) + self.input_np4 = np.random.random(size=(2000, 2000)).astype(np.float32) + self.expected6 = np.diag(self.input_np4) + self.expected7 = np.diag(self.input_np4, k=1) + self.expected8 = np.diag(self.input_np4, k=-1) + + self.input_np5 = np.random.random(size=(2000)).astype(np.float32) + self.expected9 = np.diag(self.input_np5) + self.expected10 = np.diag(self.input_np5, k=1) + self.expected11 = np.diag(self.input_np5, k=-1) + def run_imperative(self): x = paddle.to_tensor(self.input_np) y = paddle.diag(x) @@ -141,10 +151,32 @@ class TestDiagV2API(unittest.TestCase): y = paddle.diag(x, padding_value=-8) self.assertTrue(np.allclose(y.numpy(), self.expected5)) + x = paddle.to_tensor(self.input_np4) + y = paddle.diag(x) + self.assertTrue(np.allclose(y.numpy(), self.expected6)) + + y = paddle.diag(x, offset=1) + self.assertTrue(np.allclose(y.numpy(), self.expected7)) + + y = paddle.diag(x, offset=-1) + self.assertTrue(np.allclose(y.numpy(), self.expected8)) + + x = paddle.to_tensor(self.input_np5) + y = paddle.diag(x) + self.assertTrue(np.allclose(y.numpy(), self.expected9)) + + y = paddle.diag(x, offset=1) + self.assertTrue(np.allclose(y.numpy(), self.expected10)) + + y = paddle.diag(x, offset=-1) + self.assertTrue(np.allclose(y.numpy(), self.expected11)) + def run_static(self, use_gpu=False): x = paddle.data(name='input', shape=[10, 10], dtype='float32') x2 = paddle.data(name='input2', shape=[100], dtype='float64') x3 = paddle.data(name='input3', shape=[100], dtype='int64') + x4 = paddle.data(name='input4', shape=[2000, 2000], dtype='float32') + x5 = paddle.data(name='input5', shape=[2000], dtype='float32') result0 = paddle.diag(x) result1 = paddle.diag(x, offset=1) result2 = paddle.diag(x, offset=-1) @@ -152,17 +184,28 @@ class TestDiagV2API(unittest.TestCase): result4 = paddle.diag(x2, padding_value=8) result5 = paddle.diag(x3, padding_value=8.0) result6 = paddle.diag(x3, padding_value=-8) + result7 = paddle.diag(x4) + result8 = paddle.diag(x4, offset=1) + result9 = paddle.diag(x4, offset=-1) + result10 = paddle.diag(x5) + result11 = paddle.diag(x5, offset=1) + result12 = paddle.diag(x5, offset=-1) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - res0, res1, res2, res4, res5, res6 = exe.run( + res0, res1, res2, res4, res5, res6, res7, res8, res9, res10, res11, res12 = exe.run( feed={ "input": self.input_np, "input2": self.input_np2, - 'input3': self.input_np3 + 'input3': self.input_np3, + 'input4': self.input_np4, + 'input5': self.input_np5 }, - fetch_list=[result0, result1, result2, result4, result5, result6]) + fetch_list=[ + result0, result1, result2, result4, result5, result6, result7, + result8, result9, result10, result11, result12 + ]) self.assertTrue(np.allclose(res0, self.expected0)) self.assertTrue(np.allclose(res1, self.expected1)) @@ -171,6 +214,12 @@ class TestDiagV2API(unittest.TestCase): self.assertTrue(np.allclose(res4, self.expected3)) self.assertTrue(np.allclose(res5, self.expected4)) self.assertTrue(np.allclose(res6, self.expected5)) + self.assertTrue(np.allclose(res7, self.expected6)) + self.assertTrue(np.allclose(res8, self.expected7)) + self.assertTrue(np.allclose(res9, self.expected8)) + self.assertTrue(np.allclose(res10, self.expected9)) + self.assertTrue(np.allclose(res11, self.expected10)) + self.assertTrue(np.allclose(res12, self.expected11)) def test_cpu(self): paddle.disable_static(place=paddle.fluid.CPUPlace()) -- GitLab From 1e1ae5c54d31f6167c644d769d09e188495f0816 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Wed, 23 Sep 2020 19:06:50 +0800 Subject: [PATCH 053/117] Make the Bind Method of Tensor more automatic (#27270) * Makes the Bind Method more intelligent * Makes the Bind Method more intelligent * fix unittest * fix unittest * fix conflict --- paddle/fluid/pybind/imperative.cc | 62 +++++------ python/paddle/fluid/dygraph/math_op_patch.py | 101 +++++------------- python/paddle/fluid/layers/math_op_patch.py | 32 ++---- .../tests/unittests/rnn/test_rnn_cells.py | 14 +-- .../tests/unittests/rnn/test_rnn_nets.py | 26 ++--- .../unittests/test_math_op_patch_var_base.py | 88 ++++++++++++--- .../fluid/tests/unittests/test_minimum_op.py | 8 +- .../fluid/tests/unittests/test_mse_loss.py | 9 +- .../fluid/tests/unittests/test_nll_loss.py | 12 +-- .../unittests/test_nn_margin_rank_loss.py | 18 ++-- .../tests/unittests/test_nn_sigmoid_op.py | 4 +- .../fluid/tests/unittests/test_numel_op.py | 4 +- .../fluid/tests/unittests/test_ones_like.py | 2 +- .../tests/unittests/test_pairwise_distance.py | 4 +- .../fluid/tests/unittests/test_sort_op.py | 4 +- .../fluid/tests/unittests/test_tile_op.py | 6 +- .../tests/unittests/test_transformer_api.py | 76 ++++++------- .../fluid/tests/unittests/test_warpctc_op.py | 16 +-- python/paddle/tensor/manipulation.py | 2 +- 19 files changed, 229 insertions(+), 259 deletions(-) diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 489dd198876..da9900e2b27 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -649,61 +649,47 @@ void BindImperative(py::module *m_ptr) { return self.NewVarBase(tensor.place(), false); }, py::return_value_policy::copy, R"DOC( - **Notes**: - **This API is ONLY available in Dygraph mode** - Returns a new Variable, detached from the current graph. - - Returns: - ( :ref:`api_guide_Variable_en` | dtype is same as current Variable): The detached Variable. + Returns a new Tensor, detached from the current graph. + Returns: The detached Tensor. Examples: .. code-block:: python - import paddle.fluid as fluid - from paddle.fluid.dygraph.base import to_variable - from paddle.fluid.dygraph import Linear - import numpy as np - - data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32') - with fluid.dygraph.guard(): - linear = Linear(32, 64) - data = to_variable(data) - x = linear(data) - y = x.detach() + import paddle + paddle.disable_static() + linear = Linear(32, 64) + data = paddle.uniform(shape=[30, 10, 32], -1, 1) + x = linear(data) + y = x.detach() )DOC") .def("clear_gradient", &imperative::VarBase::ClearGradient, R"DOC( - **Notes**: - **1. This API is ONLY available in Dygraph mode** - - **2. Use it only Variable has gradient, normally we use this for Parameters since other temporal Variable will be deleted by Python's GC** + Only for Tensor that has gradient, normally we use this for Parameters since other temporary Tensor doesen't has gradient. - Clear (set to ``0`` ) the Gradient of Current Variable + The Gradient of current Tensor will be set to ``0`` . Returns: None Examples: .. code-block:: python - import paddle.fluid as fluid - import numpy as np - - x = np.ones([2, 2], np.float32) - with fluid.dygraph.guard(): - inputs2 = [] - for _ in range(10): - tmp = fluid.dygraph.base.to_variable(x) - tmp.stop_gradient=False - inputs2.append(tmp) - ret2 = fluid.layers.sums(inputs2) - loss2 = fluid.layers.reduce_sum(ret2) - loss2.backward() - print(loss2.gradient()) - loss2.clear_gradient() - print("After clear {}".format(loss2.gradient())) + import paddle + paddle.disable_static() + + inputs = [] + for _ in range(10): + tmp = paddle.ones([2, 2]) + tmp.stop_gradient=False + inputs.append(tmp) + ret = paddle.sums(inputs2) + loss = paddle.reduce_sum(ret) + loss.backward() + print("Before clear_gradient {}".format(loss.grad)) + loss.clear_gradient() + print("After clear_gradient {}".format(loss.grad)) )DOC") .def("_run_backward", [](imperative::VarBase &self, const imperative::Tracer &tracer, diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index 3aa7b9dfc26..68206f62860 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -17,8 +17,7 @@ from __future__ import print_function from .. import core from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator from ..layers.layer_function_generator import OpProtoHolder -from ..layers import common_methods -from . import to_variable, no_grad +from . import no_grad import numpy as np import six @@ -53,47 +52,25 @@ def monkey_patch_math_varbase(): def astype(self, dtype): """ - **Notes**: - **The variable must be a** :ref:`api_fluid_Tensor` - Cast a variable to a specified data type. + Cast a Tensor to a specified data type. Args: - - self(Variable): The source variable - - dtype: The target data type + dtype: The target data type. Returns: - Variable: Variable with new dtype + Tensor: a new Tensor with target dtype Examples: - In Static Graph Mode: - - .. code-block:: python - - import paddle.fluid as fluid - - startup_prog = fluid.Program() - main_prog = fluid.Program() - with fluid.program_guard(startup_prog, main_prog): - original_variable = fluid.data(name = "new_variable", shape=[2,2], dtype='float32') - new_variable = original_variable.astype('int64') - print("new var's dtype is: {}".format(new_variable.dtype)) - - In Dygraph Mode: - .. code-block:: python - import paddle.fluid as fluid + import paddle import numpy as np - x = np.ones([2, 2], np.float32) - with fluid.dygraph.guard(): - original_variable = fluid.dygraph.to_variable(x) - print("original var's dtype is: {}, numpy dtype is {}".format(original_variable.dtype, original_variable.numpy().dtype)) - new_variable = original_variable.astype('int64') - print("new var's dtype is: {}, numpy dtype is {}".format(new_variable.dtype, new_variable.numpy().dtype)) + original_tensor = paddle.ones([2, 2]) + print("original tensor's dtype is: {}".format(original_tensor.dtype)) + new_tensor = original_tensor.astype('float32') + print("new tensor's dtype is: {}".format(new_tensor.dtype)) """ if not isinstance(dtype, core.VarDesc.VarType): @@ -147,6 +124,10 @@ def monkey_patch_math_varbase(): def _ndim_(var): return len(var.shape) + @property + def _size_(var): + return np.prod(var.shape) + def _scalar_add_(var, value): return _scalar_elementwise_op_(var, 1.0, value) @@ -208,7 +189,6 @@ def monkey_patch_math_varbase(): __impl__.__doc__ = """ {0} Args: - self(Tensor): left hand Tensor other_var(Tensor|float|int): right hand Tensor Returns: @@ -217,23 +197,7 @@ def monkey_patch_math_varbase(): __impl__.__name__ = method_name return __impl__ - # Todo(zhouwei): implement dygraph template to adapt to any function, receive('op_type', 'arg_template') - # Such as _method_creator_('addmm', 'x, y, alpha=1.0, beta=1.0, name=None'). It can reduce call time. - def _method_creator_(op_type, arg_template=None): - def __impl__(self): - op = getattr(core.ops, op_type) - return op(self) - - __impl__.__doc__ = """ - - See paddle.{}""".format(op_type) - __impl__.__name__ = op_type - - return __impl__ - varbase_methods = [ - # Type1: From custom fun or lambda - ## b=-a ('__neg__', _neg_), ('__float__', _float_), ('__long__', _long_), @@ -244,8 +208,7 @@ def monkey_patch_math_varbase(): ('dim', lambda x: len(x.shape)), ('ndimension', lambda x: len(x.shape)), ('ndim', _ndim_), - ('size', lambda x: x.shape), - # Type2: From Template that create core.ops automatically. It's recommended. + ('size', _size_), ('__add__', _binary_creator_('__add__', 'elementwise_add', False, _scalar_add_)), ## a+b == b+a. Do not need to reverse explicitly @@ -283,31 +246,7 @@ def monkey_patch_math_varbase(): ('__le__', _binary_creator_('__le__', 'less_equal', False, None)), ('__gt__', _binary_creator_('__gt__', 'greater_than', False, None)), ('__ge__', _binary_creator_('__ge__', 'greater_equal', False, None)), - ('__array_ufunc__', None), - ('sigmoid', _method_creator_('sigmoid', 'name=None')), - ('log_sigmoid', _method_creator_('logsigmoid', 'name=None')), - ('exp', _method_creator_('exp', 'name=None')), - ('tanh', _method_creator_('tanh', 'name=None')), - ('atan', _method_creator_('atan', 'name=None')), - ('tanh_shrink', _method_creator_('tanh_shrink', 'name=None')), - ('sqrt', _method_creator_('sqrt', 'name=None')), - ('rsqrt', _method_creator_('rsqrt', 'name=None')), - ('abs', _method_creator_('abs', 'name=None')), - ('ceil', _method_creator_('ceil', 'name=None')), - ('floor', _method_creator_('floor', 'name=None')), - ('cos', _method_creator_('cos', 'name=None')), - ('acos', _method_creator_('acos', 'name=None')), - ('asin', _method_creator_('asin', 'name=None')), - ('sin', _method_creator_('sin', 'name=None')), - ('sinh', _method_creator_('sinh', 'name=None')), - ('cosh', _method_creator_('cosh', 'name=None')), - ('round', _method_creator_('round', 'name=None')), - ('reciprocal', _method_creator_('reciprocal', 'name=None')), - ('square', _method_creator_('square', 'name=None')), - ('softplus', _method_creator_('softplus', 'name=None')), - ('softsign', _method_creator_('softsign', 'name=None')), - # Type3: Form module 'paddle.tensor' defaultly. - # It's not a goodway, because it will increase call time. + ('__array_ufunc__', None) ] global _already_patch_varbase @@ -318,7 +257,15 @@ def monkey_patch_math_varbase(): setattr(core.VarBase, method_name, method_impl) else: import paddle.tensor - for method_name in common_methods: + # Tensor method from module paddle.tensor + tensor_methods = paddle.tensor.linalg.__all__ + \ + paddle.tensor.math.__all__ + \ + paddle.tensor.logic.__all__ + \ + paddle.tensor.manipulation.__all__ + \ + paddle.tensor.search.__all__ + \ + paddle.tensor.stat.__all__ + \ + paddle.tensor.attribute.__all__ + for method_name in tensor_methods: if hasattr(core.VarBase, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) if method_impl: setattr(core.VarBase, method_name, method_impl) diff --git a/python/paddle/fluid/layers/math_op_patch.py b/python/paddle/fluid/layers/math_op_patch.py index 4595f0cf939..92b58a7e2ee 100644 --- a/python/paddle/fluid/layers/math_op_patch.py +++ b/python/paddle/fluid/layers/math_op_patch.py @@ -54,29 +54,6 @@ EXPRESSION_MAP = { "__ge__": "A >= B" } -# method for Tensor from paddle.tensor -# edit it when paddle.tensor has new method about Tensor operation -common_methods = [ - 'exp', 'tanh', 'atan', 'sqrt', 'rsqrt', 'abs', 'ceil', 'floor', 'cos', - 'acos', 'asin', 'sin', 'sinh', 'cosh', 'round', 'reciprocal', 'square', - 'rank', 'matmul', 'dot', 'norm', 'transpose', 'dist', 't', 'cross', - 'cholesky', 'bmm', 'histogram', 'equal', 'greater_equal', 'greater_than', - 'is_empty', 'isfinite', 'less_equal', 'less_than', 'logical_and', - 'logical_not', 'logical_or', 'logical_xor', 'not_equal', 'reduce_all', - 'reduce_any', 'allclose', 'equal_all', 'cast', 'expand', 'expand_as', - 'tile', 'flatten', 'gather', 'gather_nd', 'reshape', 'reverse', 'scatter', - 'scatter_nd_add', 'scatter_nd', 'shard_index', 'slice', 'split', 'squeeze', - 'strided_slice', 'unique', 'unique_with_counts', 'unsqueeze', 'flip', - 'unbind', 'roll', 'cumsum', 'increment', 'log', 'pow', 'reciprocal', - 'round', 'rsqrt', 'scale', 'sign', 'stanh', 'sum', 'reduce_prod', 'max', - 'min', 'mm', 'div', 'multiply', 'add', 'logsumexp', 'log1p', 'erf', - 'addcmul', 'addmm', 'clamp', 'trace', 'kron', 'argmax', 'argmin', 'argsort', - 'has_inf', 'has_nan', 'topk', 'index_select', 'nonzero', 'sort', - 'index_sample', 'mean', 'std', 'var', 'elementwise_add', 'elementwise_div', - 'elementwise_floordiv', 'elementwise_mod', 'elementwise_pow', - 'elementwise_sub' -] - _already_patch_variable = False @@ -372,7 +349,14 @@ def monkey_patch_variable(): setattr(Variable, method_name, method_impl) else: import paddle.tensor - for method_name in common_methods: + variabel_methods = paddle.tensor.linalg.__all__ + \ + paddle.tensor.math.__all__ + \ + paddle.tensor.logic.__all__ + \ + paddle.tensor.manipulation.__all__ + \ + paddle.tensor.search.__all__ + \ + paddle.tensor.stat.__all__ + \ + paddle.tensor.attribute.__all__ + for method_name in variabel_methods: if hasattr(Variable, method_name): continue method_impl = getattr(paddle.tensor, method_name, None) if method_impl: setattr(Variable, method_name, method_impl) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py index 8d2677229a0..ab1127afa58 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_cells.py @@ -47,7 +47,7 @@ class TestSimpleRNNCell(unittest.TestCase): prev_h = np.random.randn(4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_with_zero_state(self): @@ -57,7 +57,7 @@ class TestSimpleRNNCell(unittest.TestCase): x = np.random.randn(4, 16) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def runTest(self): @@ -90,7 +90,7 @@ class TestGRUCell(unittest.TestCase): prev_h = np.random.randn(4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def test_with_zero_state(self): @@ -100,7 +100,7 @@ class TestGRUCell(unittest.TestCase): x = np.random.randn(4, 16) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) def runTest(self): @@ -134,8 +134,8 @@ class TestLSTMCell(unittest.TestCase): y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) y2, (h2, c2) = rnn2( - paddle.to_variable(x), - (paddle.to_variable(prev_h), paddle.to_variable(prev_c))) + paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -146,7 +146,7 @@ class TestLSTMCell(unittest.TestCase): x = np.random.randn(4, 16) y1, (h1, c1) = rnn1(x) - y2, (h2, c2) = rnn2(paddle.to_variable(x)) + y2, (h2, c2) = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py index ef297b3bb62..7c03b51837e 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_nets.py @@ -53,7 +53,7 @@ class TestSimpleRNN(unittest.TestCase): prev_h = np.random.randn(2 * self.num_directions, 4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -66,7 +66,7 @@ class TestSimpleRNN(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -81,11 +81,11 @@ class TestSimpleRNN(unittest.TestCase): y1, h1 = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) @@ -133,7 +133,7 @@ class TestGRU(unittest.TestCase): prev_h = np.random.randn(2 * self.num_directions, 4, 32) y1, h1 = rnn1(x, prev_h) - y2, h2 = rnn2(paddle.to_variable(x), paddle.to_variable(prev_h)) + y2, h2 = rnn2(paddle.to_tensor(x), paddle.to_tensor(prev_h)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -146,7 +146,7 @@ class TestGRU(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, h1 = rnn1(x) - y2, h2 = rnn2(paddle.to_variable(x)) + y2, h2 = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) @@ -161,11 +161,11 @@ class TestGRU(unittest.TestCase): y1, h1 = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, h2 = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, h2 = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) @@ -209,8 +209,8 @@ class TestLSTM(unittest.TestCase): y1, (h1, c1) = rnn1(x, (prev_h, prev_c)) y2, (h2, c2) = rnn2( - paddle.to_variable(x), - (paddle.to_variable(prev_h), paddle.to_variable(prev_c))) + paddle.to_tensor(x), + (paddle.to_tensor(prev_h), paddle.to_tensor(prev_c))) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -224,7 +224,7 @@ class TestLSTM(unittest.TestCase): x = np.transpose(x, [1, 0, 2]) y1, (h1, c1) = rnn1(x) - y2, (h2, c2) = rnn2(paddle.to_variable(x)) + y2, (h2, c2) = rnn2(paddle.to_tensor(x)) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2.numpy(), atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2.numpy(), atol=1e-8, rtol=1e-5) @@ -240,11 +240,11 @@ class TestLSTM(unittest.TestCase): y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) - seq_len = paddle.to_variable(sequence_length) + seq_len = paddle.to_tensor(sequence_length) mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) - y2, (h2, c2) = rnn2(paddle.to_variable(x), sequence_length=seq_len) + y2, (h2, c2) = rnn2(paddle.to_tensor(x), sequence_length=seq_len) y2 = paddle.multiply(y2, mask, axis=0) np.testing.assert_allclose(y1, y2.numpy(), atol=1e-8, rtol=1e-5) diff --git a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py index a70862f4019..5df04ddfc3d 100644 --- a/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_math_op_patch_var_base.py @@ -19,6 +19,7 @@ import paddle import paddle.fluid as fluid import numpy as np import six +import inspect class TestMathOpPatchesVarBase(unittest.TestCase): @@ -302,21 +303,13 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertEqual(x.dim(), 2) self.assertEqual(x.ndimension(), 2) self.assertEqual(x.ndim, 2) - self.assertEqual(x.size(), [2, 3]) - self.assertTrue( - np.array_equal(x.sigmoid().numpy(), fluid.layers.sigmoid(x).numpy( - ))) - self.assertTrue( - np.array_equal(x.log_sigmoid().numpy(), - fluid.layers.logsigmoid(x).numpy())) + self.assertEqual(x.size, 6) + self.assertEqual(x.numel(), 6) self.assertTrue(np.array_equal(x.exp().numpy(), paddle.exp(x).numpy())) self.assertTrue( np.array_equal(x.tanh().numpy(), paddle.tanh(x).numpy())) self.assertTrue( np.array_equal(x.atan().numpy(), paddle.atan(x).numpy())) - self.assertTrue( - np.array_equal(x.tanh_shrink().numpy(), - fluid.layers.tanh_shrink(x).numpy())) self.assertTrue(np.array_equal(x.abs().numpy(), paddle.abs(x).numpy())) m = x.abs() self.assertTrue( @@ -344,12 +337,6 @@ class TestMathOpPatchesVarBase(unittest.TestCase): ))) self.assertTrue( np.array_equal(x.square().numpy(), paddle.square(x).numpy())) - self.assertTrue( - np.array_equal(x.softplus().numpy(), - fluid.layers.softplus(x).numpy())) - self.assertTrue( - np.array_equal(x.softsign().numpy(), - fluid.layers.softsign(x).numpy())) self.assertTrue( np.array_equal(x.rank().numpy(), paddle.rank(x).numpy())) self.assertTrue( @@ -422,6 +409,8 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertTrue(np.array_equal(x.reciprocal(), paddle.reciprocal(x))) # 2. Binary operation + self.assertTrue( + np.array_equal(x.divide(y).numpy(), paddle.divide(x, y).numpy())) self.assertTrue( np.array_equal( x.matmul(y, True, False).numpy(), @@ -501,6 +490,73 @@ class TestMathOpPatchesVarBase(unittest.TestCase): self.assertTrue( np.array_equal( x.logical_and(y).numpy(), paddle.logical_and(x, y).numpy())) + a = paddle.to_tensor([[1, 2], [3, 4]]) + b = paddle.to_tensor([[4, 3], [2, 1]]) + self.assertTrue( + np.array_equal( + x.where(a, b).numpy(), paddle.where(x, a, b).numpy())) + + self.assertTrue(inspect.ismethod(a.dot)) + self.assertTrue(inspect.ismethod(a.elementwise_add)) + self.assertTrue(inspect.ismethod(a.elementwise_div)) + self.assertTrue(inspect.ismethod(a.elementwise_floordiv)) + self.assertTrue(inspect.ismethod(a.elementwise_mod)) + self.assertTrue(inspect.ismethod(a.elementwise_sub)) + self.assertTrue(inspect.ismethod(a.logsumexp)) + self.assertTrue(inspect.ismethod(a.multiplex)) + self.assertTrue(inspect.ismethod(a.prod)) + self.assertTrue(inspect.ismethod(a.reduce_max)) + self.assertTrue(inspect.ismethod(a.reduce_min)) + self.assertTrue(inspect.ismethod(a.reduce_prod)) + self.assertTrue(inspect.ismethod(a.reduce_sum)) + self.assertTrue(inspect.ismethod(a.scale)) + self.assertTrue(inspect.ismethod(a.stanh)) + self.assertTrue(inspect.ismethod(a.sums)) + self.assertTrue(inspect.ismethod(a.elementwise_sum)) + self.assertTrue(inspect.ismethod(a.max)) + self.assertTrue(inspect.ismethod(a.maximum)) + self.assertTrue(inspect.ismethod(a.min)) + self.assertTrue(inspect.ismethod(a.minimum)) + self.assertTrue(inspect.ismethod(a.floor_divide)) + self.assertTrue(inspect.ismethod(a.remainder)) + self.assertTrue(inspect.ismethod(a.floor_mod)) + self.assertTrue(inspect.ismethod(a.multiply)) + self.assertTrue(inspect.ismethod(a.logsumexp)) + self.assertTrue(inspect.ismethod(a.inverse)) + self.assertTrue(inspect.ismethod(a.log1p)) + self.assertTrue(inspect.ismethod(a.erf)) + self.assertTrue(inspect.ismethod(a.addcmul)) + self.assertTrue(inspect.ismethod(a.addmm)) + self.assertTrue(inspect.ismethod(a.clip)) + self.assertTrue(inspect.ismethod(a.trace)) + self.assertTrue(inspect.ismethod(a.kron)) + self.assertTrue(inspect.ismethod(a.isinf)) + self.assertTrue(inspect.ismethod(a.isnan)) + self.assertTrue(inspect.ismethod(a.concat)) + self.assertTrue(inspect.ismethod(a.broadcast_to)) + self.assertTrue(inspect.ismethod(a.scatter_nd_add)) + self.assertTrue(inspect.ismethod(a.scatter_nd)) + self.assertTrue(inspect.ismethod(a.shard_index)) + self.assertTrue(inspect.ismethod(a.chunk)) + self.assertTrue(inspect.ismethod(a.stack)) + self.assertTrue(inspect.ismethod(a.strided_slice)) + self.assertTrue(inspect.ismethod(a.unsqueeze)) + self.assertTrue(inspect.ismethod(a.unstack)) + self.assertTrue(inspect.ismethod(a.argmax)) + self.assertTrue(inspect.ismethod(a.argmin)) + self.assertTrue(inspect.ismethod(a.argsort)) + self.assertTrue(inspect.ismethod(a.has_inf)) + self.assertTrue(inspect.ismethod(a.has_nan)) + self.assertTrue(inspect.ismethod(a.masked_select)) + self.assertTrue(inspect.ismethod(a.topk)) + self.assertTrue(inspect.ismethod(a.index_select)) + self.assertTrue(inspect.ismethod(a.nonzero)) + self.assertTrue(inspect.ismethod(a.sort)) + self.assertTrue(inspect.ismethod(a.index_sample)) + self.assertTrue(inspect.ismethod(a.mean)) + self.assertTrue(inspect.ismethod(a.reduce_mean)) + self.assertTrue(inspect.ismethod(a.std)) + self.assertTrue(inspect.ismethod(a.numel)) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_minimum_op.py b/python/paddle/fluid/tests/unittests/test_minimum_op.py index 4c08b7386ca..a0673c82c5b 100644 --- a/python/paddle/fluid/tests/unittests/test_minimum_op.py +++ b/python/paddle/fluid/tests/unittests/test_minimum_op.py @@ -61,8 +61,8 @@ class ApiMinimumTest(unittest.TestCase): def test_dynamic_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) z = paddle.minimum(x, y) np_z = z.numpy() z_expected = np.array(np.minimum(self.input_x, self.input_y)) @@ -73,8 +73,8 @@ class ApiMinimumTest(unittest.TestCase): np_x = np.random.rand(5, 4, 3, 2).astype("float64") np_y = np.random.rand(4, 3).astype("float64") - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) result_1 = paddle.minimum(x, y, axis=1) result_2 = paddle.minimum(x, y, axis=-2) self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_mse_loss.py b/python/paddle/fluid/tests/unittests/test_mse_loss.py index 753d96c4411..e327307e955 100644 --- a/python/paddle/fluid/tests/unittests/test_mse_loss.py +++ b/python/paddle/fluid/tests/unittests/test_mse_loss.py @@ -205,8 +205,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'mean') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'mean') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -240,8 +239,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'sum') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'sum') dy_result = dy_ret.numpy() sub = input_np - target_np @@ -275,8 +273,7 @@ class TestNNFunctionalMseLoss(unittest.TestCase): paddle.disable_static() dy_ret = paddle.nn.functional.mse_loss( - paddle.to_variable(input_np), - paddle.to_variable(target_np), 'none') + paddle.to_tensor(input_np), paddle.to_tensor(target_np), 'none') dy_result = dy_ret.numpy() sub = input_np - target_np diff --git a/python/paddle/fluid/tests/unittests/test_nll_loss.py b/python/paddle/fluid/tests/unittests/test_nll_loss.py index e7154193bea..c07bf949af3 100644 --- a/python/paddle/fluid/tests/unittests/test_nll_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nll_loss.py @@ -909,8 +909,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, )).astype(np.float64) label_np = np.random.randint(0, 10, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) nll_loss = paddle.nn.loss.NLLLoss() res = nll_loss(x, label) @@ -933,8 +933,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, 3)).astype(np.float64) label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) nll_loss = paddle.nn.loss.NLLLoss(reduction='') res = nll_loss(x, label) @@ -957,8 +957,8 @@ class TestNLLLossInvalidArgs(unittest.TestCase): with fluid.dygraph.guard(): x_np = np.random.random(size=(5, 3)).astype(np.float64) label_np = np.random.randint(0, 3, size=(5, )).astype(np.int64) - x = paddle.to_variable(x_np) - label = paddle.to_variable(label_np) + x = paddle.to_tensor(x_np) + label = paddle.to_tensor(label_np) res = paddle.nn.functional.nll_loss(x, label, reduction='') self.assertRaises( diff --git a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py index 0ebe769fb9b..8ee3b2ac203 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py +++ b/python/paddle/fluid/tests/unittests/test_nn_margin_rank_loss.py @@ -101,9 +101,9 @@ def create_test_case(margin, reduction): def run_dynamic_functional_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(self.label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(self.label_data) result = paddle.nn.functional.margin_ranking_loss(x, y, label, margin, reduction) @@ -117,9 +117,9 @@ def create_test_case(margin, reduction): def run_dynamic_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(self.label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(self.label_data) margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) @@ -134,9 +134,9 @@ def create_test_case(margin, reduction): def run_dynamic_broadcast_api(self, place): paddle.disable_static(place) label_data = np.random.choice([-1, 1], size=[10]).astype("float64") - x = paddle.to_variable(self.x_data) - y = paddle.to_variable(self.y_data) - label = paddle.to_variable(label_data) + x = paddle.to_tensor(self.x_data) + y = paddle.to_tensor(self.y_data) + label = paddle.to_tensor(label_data) margin_rank_loss = paddle.nn.loss.MarginRankingLoss( margin=margin, reduction=reduction) result = margin_rank_loss(x, y, label) diff --git a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py index d52a1f5d5b1..90132a0923d 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_nn_sigmoid_op.py @@ -56,7 +56,7 @@ class TestNNSigmoidAPI(unittest.TestCase): def check_dynamic_api(self, place): paddle.disable_static(place) - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) mysigmoid = nn.Sigmoid() y = mysigmoid(x) self.assertTrue(np.allclose(y.numpy(), self.y)) @@ -94,7 +94,7 @@ class TestNNFunctionalSigmoidAPI(unittest.TestCase): def check_dynamic_api(self): paddle.disable_static() - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) y = functional.sigmoid(x) self.assertTrue(np.allclose(y.numpy(), self.y)) diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py index 8512bc99e74..800706e5965 100644 --- a/python/paddle/fluid/tests/unittests/test_numel_op.py +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -76,8 +76,8 @@ class TestNumelOoAPI(unittest.TestCase): paddle.disable_static(paddle.CPUPlace()) input_1 = np.random.random([2, 1, 4, 5]).astype("int32") input_2 = np.random.random([1, 4, 5]).astype("int32") - x_1 = paddle.to_variable(input_1) - x_2 = paddle.to_variable(input_2) + x_1 = paddle.to_tensor(input_1) + x_2 = paddle.to_tensor(input_2) out_1 = paddle.numel(x_1) out_2 = paddle.numel(x_2) assert (np.array_equal(out_1.numpy().item(0), np.size(input_1))) diff --git a/python/paddle/fluid/tests/unittests/test_ones_like.py b/python/paddle/fluid/tests/unittests/test_ones_like.py index c1e6a337771..bb0d6f07bdb 100644 --- a/python/paddle/fluid/tests/unittests/test_ones_like.py +++ b/python/paddle/fluid/tests/unittests/test_ones_like.py @@ -63,7 +63,7 @@ class TestOnesLikeImpeartive(unittest.TestCase): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() paddle.disable_static(place) - x = paddle.to_variable(np.ones(shape)) + x = paddle.to_tensor(np.ones(shape)) for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]: out = ones_like(x, dtype) self.assertEqual((out.numpy() == np.ones(shape, dtype)).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py index baf0efa6ec2..cf138e67726 100644 --- a/python/paddle/fluid/tests/unittests/test_pairwise_distance.py +++ b/python/paddle/fluid/tests/unittests/test_pairwise_distance.py @@ -48,8 +48,8 @@ def test_static(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): def test_dygraph(x_np, y_np, p=2.0, epsilon=1e-6, keepdim=False): paddle.disable_static() - x = paddle.to_variable(x_np) - y = paddle.to_variable(y_np) + x = paddle.to_tensor(x_np) + y = paddle.to_tensor(y_np) dist = paddle.nn.layer.distance.PairwiseDistance( p=p, epsilon=epsilon, keepdim=keepdim) distance = dist(x, y) diff --git a/python/paddle/fluid/tests/unittests/test_sort_op.py b/python/paddle/fluid/tests/unittests/test_sort_op.py index 015b72fd1c5..366e0c7a3fa 100644 --- a/python/paddle/fluid/tests/unittests/test_sort_op.py +++ b/python/paddle/fluid/tests/unittests/test_sort_op.py @@ -72,14 +72,14 @@ class TestSortDygraph(unittest.TestCase): def test_api_0(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) + var_x = paddle.to_tensor(self.input_data) out = paddle.sort(var_x) self.assertEqual((np.sort(self.input_data) == out.numpy()).all(), True) paddle.enable_static() def test_api_1(self): paddle.disable_static(self.place) - var_x = paddle.to_variable(self.input_data) + var_x = paddle.to_tensor(self.input_data) out = paddle.sort(var_x, axis=-1) self.assertEqual( (np.sort( diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index 5aaf3199344..b0f065a26a0 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -230,13 +230,13 @@ class TestTileAPI(unittest.TestCase): def test_api(self): with fluid.dygraph.guard(): np_x = np.random.random([12, 14]).astype("float32") - x = paddle.to_variable(np_x) + x = paddle.to_tensor(np_x) positive_2 = np.array([2]).astype("int32") - positive_2 = paddle.to_variable(positive_2) + positive_2 = paddle.to_tensor(positive_2) repeat_times = np.array([2, 3]).astype("int32") - repeat_times = paddle.to_variable(repeat_times) + repeat_times = paddle.to_tensor(repeat_times) out_1 = paddle.tile(x, repeat_times=[2, 3]) out_2 = paddle.tile(x, repeat_times=[positive_2, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py index bd76edc9d8c..7c7a71a3be1 100644 --- a/python/paddle/fluid/tests/unittests/test_transformer_api.py +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -234,23 +234,23 @@ class TestTransformer(unittest.TestCase): if cache_dict: if 'k' and 'v' in cache_dict: cache_obj = multi_head_attn.Cache( - paddle.to_variable(cache_dict['k']), - paddle.to_variable(cache_dict['v'])) + paddle.to_tensor(cache_dict['k']), + paddle.to_tensor(cache_dict['v'])) elif 'static_k' and 'static_v' in cache_dict: cache_obj = multi_head_attn.StaticCache( - paddle.to_variable(cache_dict['static_k']), - paddle.to_variable(cache_dict['static_v'])) + paddle.to_tensor(cache_dict['static_k']), + paddle.to_tensor(cache_dict['static_v'])) if attn_mask is not None: attn_output = multi_head_attn( - paddle.to_variable(query), - paddle.to_variable(key), - paddle.to_variable(value), - paddle.to_variable(attn_mask), cache_obj) + paddle.to_tensor(query), + paddle.to_tensor(key), + paddle.to_tensor(value), + paddle.to_tensor(attn_mask), cache_obj) else: attn_output = multi_head_attn( - paddle.to_variable(query), - paddle.to_variable(key), - paddle.to_variable(value), attn_mask, cache_obj) + paddle.to_tensor(query), + paddle.to_tensor(key), + paddle.to_tensor(value), attn_mask, cache_obj) attn_output = attn_output[0] if cache_dict else attn_output # implementation by numpy @@ -296,16 +296,16 @@ class TestTransformer(unittest.TestCase): attn_dropout, act_dropout) encoder_output = encoder_layer( - paddle.to_variable(src), - paddle.to_variable(src_mask)) # paddle.to_variable(src_mask)) + paddle.to_tensor(src), + paddle.to_tensor(src_mask)) # paddle.to_tensor(src_mask)) # 4.numpy: # paddle self attention self_attn = MultiHeadAttention( d_model, n_head, dropout=attn_dropout) attn_output = self_attn( - paddle.to_variable(src), - paddle.to_variable(src), - paddle.to_variable(src), paddle.to_variable(src_mask)).numpy() + paddle.to_tensor(src), + paddle.to_tensor(src), + paddle.to_tensor(src), paddle.to_tensor(src_mask)).numpy() src = attn_output + residual src_norm = layer_norm(src, d_model, encoder_layer.norm1) @@ -348,13 +348,13 @@ class TestTransformer(unittest.TestCase): cache_objs = None if cache: cache_objs = decoder_layer.gen_cache( - paddle.to_variable(memory)) + paddle.to_tensor(memory)) decoder_output = decoder_layer( - paddle.to_variable(tgt), - paddle.to_variable(memory), - paddle.to_variable(tgt_mask), - paddle.to_variable(memory_mask), cache_objs) + paddle.to_tensor(tgt), + paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), + paddle.to_tensor(memory_mask), cache_objs) decoder_output = decoder_output[0].numpy( ) if cache else decoder_output.numpy() @@ -365,10 +365,10 @@ class TestTransformer(unittest.TestCase): self_attn_cache = cache_objs[ 0] if cache_objs is not None else None tgt = self_attn( - paddle.to_variable(tgt), - paddle.to_variable(tgt), - paddle.to_variable(tgt), - paddle.to_variable(tgt_mask), self_attn_cache) + paddle.to_tensor(tgt), + paddle.to_tensor(tgt), + paddle.to_tensor(tgt), + paddle.to_tensor(tgt_mask), self_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() @@ -380,10 +380,10 @@ class TestTransformer(unittest.TestCase): cross_attn_cache = cache_objs[ 1] if cache_objs is not None else None tgt = cross_attn( - paddle.to_variable(tgt_norm), - paddle.to_variable(memory), - paddle.to_variable(memory), - paddle.to_variable(memory_mask), cross_attn_cache) + paddle.to_tensor(tgt_norm), + paddle.to_tensor(memory), + paddle.to_tensor(memory), + paddle.to_tensor(memory_mask), cross_attn_cache) tgt = tgt[0].numpy() if cache else tgt.numpy() # postprocess @@ -416,7 +416,7 @@ class TestTransformer(unittest.TestCase): encoder = TransformerEncoder(encoder_layer, num_layers) # src, src_mask enc_output = encoder( - paddle.to_variable(src), paddle.to_variable(src_mask)) + paddle.to_tensor(src), paddle.to_tensor(src_mask)) def test_decoder(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -438,9 +438,9 @@ class TestTransformer(unittest.TestCase): decoder = TransformerDecoder(decoder_layer, num_layers) output = decoder( - paddle.to_variable(tgt), - paddle.to_variable(memory), - paddle.to_variable(tgt_mask), paddle.to_variable(memory_mask)) + paddle.to_tensor(tgt), + paddle.to_tensor(memory), + paddle.to_tensor(tgt_mask), paddle.to_tensor(memory_mask)) def test_transformer(self): batch_size, d_model, n_head, dim_feedforward, dropout, _, _, source_length, target_length = generate_basic_params( @@ -453,24 +453,24 @@ class TestTransformer(unittest.TestCase): n_head, dim_feedforward=dim_feedforward, dropout=dropout) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index 6bc42f0712a..c4155e0d826 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -424,10 +424,10 @@ class TestCTCLossAPICase(unittest.TestCase): loss_np = ctc.forward() paddle.disable_static() - softmax = paddle.to_variable(logits) - labels = paddle.to_variable(labels) - logits_length = paddle.to_variable(self.logits_length) - labels_length = paddle.to_variable(self.labels_length) + softmax = paddle.to_tensor(logits) + labels = paddle.to_tensor(labels) + logits_length = paddle.to_tensor(self.logits_length) + labels_length = paddle.to_tensor(self.labels_length) loss_pd_mean = F.ctc_loss( softmax, labels, @@ -477,10 +477,10 @@ class TestCTCLossAPICase(unittest.TestCase): loss_np = ctc.forward() paddle.disable_static() - softmax = paddle.to_variable(logits) - labels = paddle.to_variable(labels) - logits_length = paddle.to_variable(self.logits_length) - labels_length = paddle.to_variable(self.labels_length) + softmax = paddle.to_tensor(logits) + labels = paddle.to_tensor(labels) + logits_length = paddle.to_tensor(self.logits_length) + labels_length = paddle.to_tensor(self.labels_length) loss_pd = paddle.nn.CTCLoss(self.blank, 'none')( softmax, labels, logits_length, labels_length) diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 9de407841fb..dc6a04a4723 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -53,7 +53,7 @@ __all__ = [ 'shard_index', 'slice', 'split', - 'chunk' + 'chunk', 'squeeze', 'stack', 'strided_slice', -- GitLab From c17f9cf25fd42ab868983a85c03d8c9a2b4a007d Mon Sep 17 00:00:00 2001 From: Shang Zhizhou Date: Wed, 23 Sep 2020 19:09:28 +0800 Subject: [PATCH 054/117] [bug fix]:Memory increases after adapting the cudnn version to cudnn8 (#27436) * [bug fix]:Memory increases after adapting the cudnn version to 8 * [bug fix]cudnnGetConvolutionForwardAlgorithm not defined --- paddle/fluid/operators/conv_cudnn_helper.h | 30 ++++++++++++++++++- .../fluid/operators/fused/conv_fusion_op.cu | 10 ++++++- paddle/fluid/platform/dynload/cudnn.cc | 8 +++++ paddle/fluid/platform/dynload/cudnn.h | 1 + 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h index 25b45f281a7..fac8e242510 100644 --- a/paddle/fluid/operators/conv_cudnn_helper.h +++ b/paddle/fluid/operators/conv_cudnn_helper.h @@ -162,7 +162,20 @@ struct SearchAlgorithm { workspace_size = GetWorkspaceSize(args, algo); if (workspace_size > workspace_size_limit) { +#if CUDNN_VERSION >= 8000 workspace_size_limit = workspace_size; +#else + VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " + "the workspace size request(" + << workspace_size << ") exceeds the limit(" + << workspace_size_limit << ")"; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionForwardAlgorithm( + args.handle, args.idesc.desc(), args.wdesc.desc(), + args.cdesc.desc(), args.odesc.desc(), + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); +#endif } #else PADDLE_ENFORCE_CUDA_SUCCESS( @@ -291,8 +304,23 @@ struct SearchAlgorithm { #endif workspace_size = GetWorkspaceSize(args, algo); if (workspace_size > workspace_size_limit) { - workspace_size_limit = workspace_size; has_got_workspace_size = false; +#if CUDNN_VERSION >= 8000 + // There is no cudnnGetConvolutionBackwardDataAlgorithm in CUDNN 8 + // version. + workspace_size_limit = workspace_size; +#else + VLOG(1) << "Fallback to non-v7 method to find conv algorithm becasue " + "the workspace size request(" + << workspace_size << ") exceeds the limit(" + << workspace_size_limit << ")"; + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionBackwardDataAlgorithm( + args.handle, args.wdesc.desc(), args.odesc.desc(), + args.cdesc.desc(), args.idesc.desc(), + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); +#endif } #else PADDLE_ENFORCE_CUDA_SUCCESS( diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index b22f28fbbe3..49fded886a0 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -204,6 +204,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { auto x_dims = framework::vectorize(transformed_input.dims()); auto f_dims = framework::vectorize(filter->dims()); if (!exhaustive_search) { +#if CUDNN_VERSION >= 8000 int perf_count; int best_algo_idx = 0; size_t tmp_size = 0; @@ -215,13 +216,20 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel { cudnn_output_desc, kNUM_CUDNN_FWD_ALGS, &perf_count, perf_results.get())); algo = (perf_results.get())[best_algo_idx].algo; - VLOG(3) << "cuDNN forward algo " << algo; PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::cudnnGetConvolutionForwardWorkspaceSize( handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, cudnn_output_desc, algo, &workspace_size_in_bytes)); if (workspace_size_in_bytes > workspace_size_limit) workspace_size_limit = workspace_size_in_bytes; +#else + PADDLE_ENFORCE_CUDA_SUCCESS( + platform::dynload::cudnnGetConvolutionForwardAlgorithm( + handle, cudnn_input_desc, cudnn_filter_desc, cudnn_conv_desc, + cudnn_output_desc, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + workspace_size_limit, &algo)); + VLOG(3) << "cuDNN forward algo " << algo; +#endif } else { std::function search_func = [&]() -> cudnnConvolutionFwdAlgo_t { diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index 1166dc5e4ad..4c59fe5e9ba 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -30,6 +30,10 @@ CUDNN_DNN_ROUTINE_EACH_R2(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8 +CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DEFINE_WRAP); +#endif + #ifdef CUDNN_DNN_ROUTINE_EACH_AFTER_R4 CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP); #endif @@ -54,6 +58,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_TWO_R7(DEFINE_WRAP); CUDNN_DNN_ROUTINE_EACH_AFTER_R7(DEFINE_WRAP); #endif +#ifdef CUDNN_DNN_ROUTINE_EACH_R8 +CUDNN_DNN_ROUTINE_EACH_R8(DEFINE_WRAP); +#endif + bool HasCUDNN() { std::call_once(cudnn_dso_flag, []() { cudnn_dso_handle = GetCUDNNDsoHandle(); }); diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index fba41417648..dd0a2e19685 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -134,6 +134,7 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R3(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #define CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(__macro) \ __macro(cudnnGetConvolutionBackwardFilterAlgorithm); \ __macro(cudnnGetConvolutionForwardAlgorithm); \ + __macro(cudnnGetConvolutionBackwardDataAlgorithm); \ __macro(cudnnSetRNNDescriptor); CUDNN_DNN_ROUTINE_EACH_AFTER_R3_LESS_R8(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP) #endif -- GitLab From 19a58b3d5d48e07f3d4859a8817c8f9f740ad4cf Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Wed, 23 Sep 2020 19:12:04 +0800 Subject: [PATCH 055/117] disable ut test_vision_models and test_pretrained_model,test=document_fix (#27502) --- python/paddle/tests/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index e1bc65a5d15..6fb73b08c11 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -8,6 +8,10 @@ foreach(TEST_OP ${DIST_TEST_OPS}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() +# disable test_pretrained_model and test_vision_models +list(REMOVE_ITEM TEST_OPS test_pretrained_model) +list(REMOVE_ITEM TEST_OPS test_vision_models) + foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() -- GitLab From bb84f0e64612ad4b6899c61aab2d3e97b1177b27 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Wed, 23 Sep 2020 20:12:46 +0800 Subject: [PATCH 056/117] Add new paddle.save/load APIs (#27331) * init commit of new save/load * fix failed unittests * fix save_load_v2 unittest failed * fix failed unittest & polish doc * add tests for coverage * add more tests & move static apis * fix example code error * polish emample code * fix detail example code problem --- python/paddle/fluid/dygraph/checkpoint.py | 18 +- python/paddle/fluid/dygraph/jit.py | 27 +- python/paddle/fluid/dygraph/layers.py | 6 +- python/paddle/fluid/dygraph/parallel.py | 6 +- python/paddle/fluid/optimizer.py | 20 +- .../unittests/test_imperative_save_load.py | 18 +- .../unittests/test_imperative_save_load_v2.py | 29 +- .../test_load_state_dict_from_old_format.py | 41 ++- .../tests/unittests/test_paddle_save_load.py | 148 +++++++++ python/paddle/framework/__init__.py | 4 +- python/paddle/framework/io.py | 291 ++++++++++++++++++ python/paddle/io/__init__.py | 8 - python/paddle/static/__init__.py | 13 +- python/paddle/tensor/__init__.py | 2 - python/paddle/tensor/io.py | 19 -- 15 files changed, 539 insertions(+), 111 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_paddle_save_load.py create mode 100644 python/paddle/framework/io.py delete mode 100644 python/paddle/tensor/io.py diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 93cb0bafc84..f4ea4d670e6 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -145,7 +145,7 @@ def load_dygraph(model_path, config=None): .. note:: Due to some historical reasons, if you load ``state_dict`` from the saved - result of `paddle.io.save_inference_model`, the structured variable name + result of `paddle.static.save_inference_model`, the structured variable name will cannot be restored. You need to set the argument `use_structured_name=False` when using `Layer.set_state_dict` later. @@ -164,24 +164,24 @@ def load_dygraph(model_path, config=None): .. code-block:: python import paddle - + import paddle.fluid as fluid + paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() - paddle.save(state_dict, "paddle_dy") + fluid.save_dygraph(state_dict, "paddle_dy") - scheduler = paddle.optimizer.lr_scheduler.NoamLR( + scheduler = paddle.optimizer.lr_scheduler.NoamLR( d_model=0.01, warmup_steps=100, verbose=True) adam = paddle.optimizer.Adam( learning_rate=scheduler, parameters=emb.parameters()) state_dict = adam.state_dict() - paddle.save(state_dict, "paddle_dy") - - para_state_dict, opti_state_dict = paddle.load("paddle_dy") + fluid.save_dygraph(state_dict, "paddle_dy") + para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy") ''' # deal with argument `model_path` model_prefix = model_path @@ -275,7 +275,7 @@ def load_dygraph(model_path, config=None): # If users save all parameters as one file, the [ variable.name -> variable ] # mapping info will lost, so users need to give variable list, but users build # variable list in dygraph mode is difficult, we recommend users to use - # paddle.io.load_program_state in this case + # paddle.static.load_program_state in this case # Try to load all the files in the directory in VarBase format, # the file name is used as the name of VarBase diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index 10819e4b320..d0e3d23b04b 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -231,9 +231,7 @@ def declarative(function=None, input_spec=None): class SaveLoadConfig(object): """ The additional configuration options may be used in function - :ref:`api_imperative_jit_save` that save :ref:`api_imperative_TranslatedLayer` - or used in function :ref:`api_imperative_jit_load` that - load :ref:`api_imperative_TranslatedLayer` . + ``paddle.jit.save/load`` and ``paddle.load`` . Examples: 1. Using ``SaveLoadConfig`` when saving model @@ -319,7 +317,7 @@ class SaveLoadConfig(object): @property def output_spec(self): """ - Selects the output targets of the saved model ( :ref:`api_imperative_TranslatedLayer` ). + Selects the output targets of the saved model ( ``paddle.jit.TranslatedLayer`` ). By default, all return variables of original Layer's forward function are kept as the output of the saved TranslatedLayer. @@ -531,11 +529,14 @@ class SaveLoadConfig(object): def separate_params(self): """ Configure whether to save the Layer parameters as separete files. - (In order to be compatible with the behavior of :ref:`api_fluid_io_save_inference_model` ) + (In order to be compatible with the behavior of ``paddle.static.save_inference_model`` ) If True, each parameter will be saved to a file separately, the file name is the parameter name, and the SaveLoadConfig.params_filename configuration will not take effect. Default False. + .. note:: + Only used for ``paddle.jit.save`` . + Examples: .. code-block:: python @@ -569,7 +570,7 @@ class SaveLoadConfig(object): adam.clear_grad() model_path = "simplenet.example.model.separate_params" - config = paddle.jit.SaveLoadConfig() + config = paddle.SaveLoadConfig() config.separate_params = True # saving with configs.separate_params @@ -599,12 +600,12 @@ class SaveLoadConfig(object): def keep_name_table(self): """ Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict. - This dict is the debugging information saved when call `paddle.save`. + This dict is the debugging information saved when call ``paddle.save`` . It is generally only used for debugging and does not affect the actual training or inference. - By default, it will not be retained in `paddle.load` result. Default: False. + By default, it will not be retained in ``paddle.load`` result. Default: False. .. note:: - Only used for ``paddle.load``. + Only used for ``paddle.load`` . Examples: .. code-block:: python @@ -616,11 +617,11 @@ class SaveLoadConfig(object): linear = paddle.nn.Linear(5, 1) state_dict = linear.state_dict() - paddle.save(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") - configs = paddle.SaveLoadConfig() - configs.keep_name_table = True - para_state_dict, _ = paddle.load("paddle_dy", configs) + config = paddle.SaveLoadConfig() + config.keep_name_table = True + para_state_dict = paddle.load("paddle_dy.pdparams", config) print(para_state_dict) # the name_table is 'StructuredToParameterName@@' diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 9c79deaab73..88e24e7e1ea 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -970,12 +970,12 @@ class Layer(core.Layer): paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() - paddle.save(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") - para_state_dict, _ = paddle.load("paddle_dy") + para_state_dict = paddle.load("paddle_dy.pdparams") emb.set_state_dict(para_state_dict) diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 472022bced7..de761cad529 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -610,13 +610,13 @@ class DataParallel(layers.Layer): paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) emb = fluid.dygraph.DataParallel(emb, strategy) state_dict = emb.state_dict() - paddle.save(state_dict, "paddle_dy") + paddle.save(state_dict, "paddle_dy.pdparams") - para_state_dict, _ = paddle.load("paddle_dy") + para_state_dict = paddle.load("paddle_dy.pdparams") emb.set_state_dict(para_state_dict) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 1e7915ed781..0dd1694c86c 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -182,23 +182,25 @@ class Optimizer(object): Examples: .. code-block:: python - import paddle + import paddle + import paddle.fluid as fluid paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() - paddle.save(state_dict, "paddle_dy") + fluid.save_dygraph(state_dict, "paddle_dy") - adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), - parameter_list=emb.parameters()) + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) state_dict = adam.state_dict() + fluid.save_dygraph(state_dict, "paddle_dy") - para_state_dict, opti_state_dict = paddle.load("paddle_dy") - - adam.set_state_dict(opti_state_dict) - + para_state_dict, opti_state_dict = fluid.load_dygraph("paddle_dy") ''' from paddle.optimizer.lr_scheduler import _LRScheduler if isinstance(self._learning_rate, _LRScheduler): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 22e19efcb58..bee53fd10f5 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -292,7 +292,7 @@ class TestDygraphPtbRnn(unittest.TestCase): np_t = v.numpy() self.model_base[k] = np_t - paddle.save(self.state_dict, "./test_dy") + fluid.save_dygraph(self.state_dict, "./test_dy") def testLoadAndSetVarBase(self): seed = 90 @@ -373,7 +373,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - para_state_dict, opti_state_dict = paddle.load("./test_dy") + para_state_dict, opti_state_dict = fluid.load_dygraph("./test_dy") adam.set_state_dict(opti_state_dict) opti_dict = adam.state_dict() @@ -898,31 +898,31 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) - para_state_dict, opti_state_dict = paddle.load( + para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy')) self.assertTrue(opti_state_dict == None) - para_state_dict, opti_state_dict = paddle.load( + para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, opti_state_dict = paddle.load( + para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy.pdopt')) def test_load_compatible_with_keep_name_table(self): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy')) - para_state_dict, opti_state_dict = paddle.load( + para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy'), True) self.assertTrue(para_state_dict != None) self.assertTrue(opti_state_dict == None) - para_state_dict, opti_state_dict = paddle.load( + para_state_dict, opti_state_dict = fluid.load_dygraph( os.path.join('saved_dy', 'emb_dy'), keep_name_table=True) self.assertTrue(para_state_dict != None) self.assertTrue(opti_state_dict == None) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 3eb413a6266..5b7998198ef 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -285,7 +285,7 @@ class TestDygraphPtbRnn(unittest.TestCase): else: self.base_opti[k] = v - fluid.save_dygraph(self.opti_dict, "./test_dy_v2") + paddle.save(self.opti_dict, "./test_dy_v2.pdopt") self.state_dict = ptb_model.state_dict() @@ -294,7 +294,7 @@ class TestDygraphPtbRnn(unittest.TestCase): np_t = v.numpy() self.model_base[k] = np_t - paddle.save(self.state_dict, "./test_dy_v2") + paddle.save(self.state_dict, "./test_dy_v2.pdparams") def testLoadAndSetVarBase(self): self.setUp() @@ -374,7 +374,8 @@ class TestDygraphPtbRnn(unittest.TestCase): self.assertTrue(np.sum(np.abs(v.numpy())) == 0) - para_state_dict, opti_state_dict = paddle.load("./test_dy_v2") + para_state_dict = paddle.load("./test_dy_v2.pdparams") + opti_state_dict = paddle.load("./test_dy_v2.pdopt") adam.set_state_dict(opti_state_dict) opti_dict = adam.state_dict() @@ -905,26 +906,19 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, opti_state_dict = paddle.load( - os.path.join('saved_dy', 'emb_dy')) - - self.assertTrue(opti_state_dict == None) - - para_state_dict, opti_state_dict = paddle.load( + para_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, opti_state_dict = paddle.load( - os.path.join('saved_dy', 'emb_dy.pdopt')) - def test_no_state_in_input_dict(self): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy.pdparams')) para_state_dict.pop('weight') emb.set_state_dict(para_state_dict) @@ -933,9 +927,10 @@ class TestDygraphPtbRnn(unittest.TestCase): with fluid.dygraph.guard(): emb = fluid.dygraph.Embedding([10, 10]) state_dict = emb.state_dict() - paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy.pdparams')) - para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy.pdparams')) para_state_dict['weight'] = np.expand_dims( para_state_dict['weight'], axis=-1) diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py index a1a9b3f444f..fdc1e6b52ab 100644 --- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -124,52 +124,67 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): self.params_filename = None orig_param_dict = self.train_and_save_model() - load_param_dict, _ = paddle.load(self.save_dirname) + load_param_dict, _ = fluid.load_dygraph(self.save_dirname) self.check_load_state_dict(orig_param_dict, load_param_dict) + new_load_param_dict = paddle.load(self.save_dirname) + self.check_load_state_dict(orig_param_dict, new_load_param_dict) + def test_load_with_model_filename(self): self.save_dirname = "static_mnist.load_state_dict.model_filename" self.model_filename = "static_mnist.model" self.params_filename = None orig_param_dict = self.train_and_save_model() - configs = paddle.SaveLoadConfig() - configs.separate_params = True - configs.model_filename = self.model_filename - load_param_dict, _ = paddle.load(self.save_dirname, configs) + config = paddle.SaveLoadConfig() + config.separate_params = True + config.model_filename = self.model_filename + load_param_dict, _ = fluid.load_dygraph(self.save_dirname, config) self.check_load_state_dict(orig_param_dict, load_param_dict) + new_load_param_dict = paddle.load(self.save_dirname, config) + self.check_load_state_dict(orig_param_dict, new_load_param_dict) + def test_load_with_param_filename(self): self.save_dirname = "static_mnist.load_state_dict.param_filename" self.model_filename = None self.params_filename = "static_mnist.params" orig_param_dict = self.train_and_save_model() - configs = paddle.SaveLoadConfig() - configs.params_filename = self.params_filename - load_param_dict, _ = paddle.load(self.save_dirname, configs) + config = paddle.SaveLoadConfig() + config.params_filename = self.params_filename + load_param_dict, _ = fluid.load_dygraph(self.save_dirname, config) self.check_load_state_dict(orig_param_dict, load_param_dict) + new_load_param_dict = paddle.load(self.save_dirname, config) + self.check_load_state_dict(orig_param_dict, new_load_param_dict) + def test_load_with_model_and_param_filename(self): self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename" self.model_filename = "static_mnist.model" self.params_filename = "static_mnist.params" orig_param_dict = self.train_and_save_model() - configs = paddle.SaveLoadConfig() - configs.params_filename = self.params_filename - configs.model_filename = self.model_filename - load_param_dict, _ = paddle.load(self.save_dirname, configs) + config = paddle.SaveLoadConfig() + config.params_filename = self.params_filename + config.model_filename = self.model_filename + load_param_dict, _ = fluid.load_dygraph(self.save_dirname, config) self.check_load_state_dict(orig_param_dict, load_param_dict) + new_load_param_dict = paddle.load(self.save_dirname, config) + self.check_load_state_dict(orig_param_dict, new_load_param_dict) + def test_load_state_dict_from_save_params(self): self.save_dirname = "static_mnist.load_state_dict.save_params" self.params_filename = None orig_param_dict = self.train_and_save_model(True) - load_param_dict, _ = paddle.load(self.save_dirname) + load_param_dict, _ = fluid.load_dygraph(self.save_dirname) self.check_load_state_dict(orig_param_dict, load_param_dict) + new_load_param_dict = paddle.load(self.save_dirname) + self.check_load_state_dict(orig_param_dict, new_load_param_dict) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py new file mode 100644 index 00000000000..74d44d0f8b6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py @@ -0,0 +1,148 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.nn as nn +import paddle.optimizer as opt + +BATCH_SIZE = 16 +BATCH_NUM = 4 +EPOCH_NUM = 4 +SEED = 10 + +IMAGE_SIZE = 784 +CLASS_NUM = 10 + + +# define a random dataset +class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + np.random.seed(SEED) + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + +class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) + + def forward(self, x): + return self._linear(x) + + +def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + + +class TestSaveLoad(unittest.TestCase): + def setUp(self): + # enable dygraph mode + self.place = paddle.CPUPlace() + paddle.disable_static(self.place) + + # config seed + paddle.manual_seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + def build_and_train_model(self): + # create network + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) + + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader( + dataset, + places=self.place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + + # train + train(layer, loader, loss_fn, adam) + + return layer, adam + + def check_load_state_dict(self, orig_dict, load_dict): + for var_name, value in orig_dict.items(): + self.assertTrue(np.array_equal(value.numpy(), load_dict[var_name])) + + def test_save_load(self): + layer, opt = self.build_and_train_model() + + # save + layer_save_path = "linear.pdparams" + opt_save_path = "linear.pdopt" + layer_state_dict = layer.state_dict() + opt_state_dict = opt.state_dict() + + paddle.save(layer_state_dict, layer_save_path) + paddle.save(opt_state_dict, opt_save_path) + + # load + load_layer_state_dict = paddle.load(layer_save_path) + load_opt_state_dict = paddle.load(opt_save_path) + + self.check_load_state_dict(layer_state_dict, load_layer_state_dict) + self.check_load_state_dict(opt_state_dict, load_opt_state_dict) + + # test save load in static mode + paddle.enable_static() + static_save_path = "static_mode_test/linear.pdparams" + paddle.save(layer_state_dict, static_save_path) + load_static_state_dict = paddle.load(static_save_path) + self.check_load_state_dict(layer_state_dict, load_static_state_dict) + + # error test cases, some tests relay base test above + # 1. test save obj not dict error + test_list = [1, 2, 3] + with self.assertRaises(NotImplementedError): + paddle.save(test_list, "not_dict_error_path") + + # 2. test save path format error + with self.assertRaises(ValueError): + paddle.save(layer_state_dict, "linear.model/") + + # 3. test load path not exist error + with self.assertRaises(ValueError): + paddle.load("linear.params") + + # 4. test load old save path error + with self.assertRaises(ValueError): + paddle.load("linear") + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index f33e4e0fca8..2ce442add2e 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -48,8 +48,8 @@ from paddle.fluid import core #DEFINE_ALIAS from ..fluid.dygraph.base import no_grad #DEFINE_ALIAS from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS -from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS -from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS +from .io import save +from .io import load from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS diff --git a/python/paddle/framework/io.py b/python/paddle/framework/io.py new file mode 100644 index 00000000000..7175f310144 --- /dev/null +++ b/python/paddle/framework/io.py @@ -0,0 +1,291 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import collections +import pickle +import six +import warnings + +import paddle + +# deprecated module import +from paddle import fluid +from paddle.fluid import core +from paddle.fluid.framework import Variable, _varbase_creator, _dygraph_tracer +from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers, EXTRA_VAR_INFO_FILENAME + +__all__ = [ + 'save', + 'load', +] + + +def _build_saved_state_dict(state_dict): + save_dict = {} + name_table = {} + for key, value in state_dict.items(): + if isinstance(value, (Variable, core.VarBase)): + save_dict[key] = value.numpy() + name_table[key] = value.name + else: + save_dict[key] = value + save_dict["StructuredToParameterName@@"] = name_table + + return save_dict + + +def _load_state_dict_from_save_inference_model(model_path, config): + # 1. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, config.model_filename) + + # 2. load layer parameters & buffers + with fluid.dygraph.guard(): + persistable_var_dict = _construct_params_and_buffers( + model_path, + programs, + config.separate_params, + config.params_filename, + append_suffix=False) + + # 3. construct state_dict + load_param_dict = dict() + for var_name in persistable_var_dict: + load_param_dict[var_name] = persistable_var_dict[var_name].numpy() + + # if __variables.info__ exists, we can recover structured_name + var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) + if os.path.exists(var_info_path): + with open(var_info_path, 'rb') as f: + extra_var_info = pickle.load(f) + structured_para_dict = dict() + for var_name in load_param_dict: + structured_name = extra_var_info[var_name].get( + 'structured_name', None) + assert structured_name is not None, "Cannot find saved variable (%s)'s structured name in saved model." % var_name + structured_para_dict[structured_name] = load_param_dict[ + var_name] + load_param_dict = structured_para_dict + + return load_param_dict + + +def _load_state_dict_from_save_params(model_path): + # Try to load all the files in the directory in VarBase format, + # the file name is used as the name of VarBase + load_var_list = [] + + # 1. load file names + var_name_list = [] + for root, _, files in os.walk(model_path): + for filename in files: + file_path = os.path.join(root, filename) + tmp_var_name = os.path.relpath(file_path, model_path) + var_name = tmp_var_name.replace("\\", "/") + var_name_list.append(var_name) + + # 2. create and load VarBase + with fluid.dygraph.guard(): + for name in var_name_list: + new_var = _varbase_creator(name=name, persistable=True) + _dygraph_tracer().trace_op( + type='load', + inputs={}, + outputs={'Out': new_var}, + attrs={'file_path': os.path.join(model_path, name)}) + load_var_list.append(new_var) + + # 3. construct state_dict + load_param_dict = dict() + for var in load_var_list: + load_param_dict[var.name] = var.numpy() + + return load_param_dict + + +def save(obj, path): + ''' + Save an object to the specified path. + + .. note:: + Now only supports save ``state_dict`` of Layer or Optimizer. + + Args: + obj(Object) : The object to be saved. + path(str) : The path of the object to be saved. + If saved in the current directory, the input path string will be used as the file name. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding(10, 10) + layer_state_dict = emb.state_dict() + paddle.save(layer_state_dict, "emb.pdparams") + + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) + opt_state_dict = adam.state_dict() + paddle.save(opt_state_dict, "adam.pdopt") + ''' + + # 1. input check + if not isinstance(obj, dict): + raise NotImplementedError( + "Now only supports save state_dict of Layer or Optimizer, " + "expect dict, but received %s." % type(obj)) + + if len(obj) == 0: + warnings.warn("The input state dict is empty, no need to save.") + + filename = os.path.basename(path) + if filename == "": + raise ValueError("The input path MUST be format of dirname/filename " + "[dirname\\filename in Windows system], but received " + "filename is empty string.") + + # 2. save object + dirname = os.path.dirname(path) + if dirname and not os.path.exists(dirname): + os.makedirs(dirname) + + # TODO(chenweihang): supports save other object + saved_obj = _build_saved_state_dict(obj) + + with open(path, 'wb') as f: + pickle.dump(saved_obj, f, protocol=2) + + +def load(path, config=None): + ''' + Load an object can be used in paddle from specified path. + + .. note:: + Now only supports load ``state_dict`` of Layer or Optimizer. + + .. note:: + ``paddle.load`` supports loading ``state_dict`` from the result of several + paddle1.x save APIs in static mode, but due to some historical reasons, + if you load ``state_dict`` from the saved result of + ``paddle.static.save_inference_model/paddle.fluid.io.save_params/paddle.fluid.io.save_persistables`` , + the structured variable name will cannot be restored. You need to set the argument + ``use_structured_name=False`` when using ``Layer.set_state_dict`` later. + + Args: + path(str) : The path to load the target object. Generally, the path is the target + file path, when compatible with loading the saved results of + ``paddle.jit.save/paddle.static.save_inference_model`` , the path is a directory. + config (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` + object that specifies additional configuration options, these options + are for compatibility with ``paddle.jit.save/paddle.static.save_inference_model`` + formats. Default None. + + Returns: + Object(Object): a target object can be used in paddle + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding(10, 10) + layer_state_dict = emb.state_dict() + paddle.save(layer_state_dict, "emb.pdparams") + + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) + opt_state_dict = adam.state_dict() + paddle.save(opt_state_dict, "adam.pdopt") + + load_layer_state_dict = paddle.load("emb.pdparams") + load_opt_state_dict = paddle.load("adam.pdopt") + ''' + # 1. input check + if not os.path.exists(path): + error_msg = "The path `%s` does not exist." + # if current path is a prefix, and the path.pdparams or path.pdopt + # is exist, users may want use `paddle.load` load the result of + # `fluid.save_dygraph`, we raise error here for users + params_file_path = path + ".pdparams" + opti_file_path = path + ".pdopt" + if os.path.exists(params_file_path) or os.path.exists(opti_file_path): + error_msg += " If you want to load the results saved by `fluid.save_dygraph`, " \ + "please specify the full file name, not just the file name prefix. For " \ + "example, it should be written as `paddle.load('model.pdparams')` instead of " \ + "`paddle.load('model')`." + raise ValueError(error_msg % path) + + if config is None: + config = paddle.SaveLoadConfig() + + # 2. load target + load_result = None + if os.path.isfile(path): + # we think path is file means this file is created by paddle.save + with open(path, 'rb') as f: + load_result = pickle.load(f) if six.PY2 else pickle.load( + f, encoding='latin1') + + if not config.keep_name_table and "StructuredToParameterName@@" in load_result: + del load_result["StructuredToParameterName@@"] + elif os.path.isdir(path): + # we think path is directory means compatible with loading + # store results of static mode related save APIs + + # check whether model file exists + if config.model_filename is None: + model_filename = '__model__' + else: + model_filename = config.model_filename + model_file_path = os.path.join(path, model_filename) + + if os.path.exists(model_file_path): + # Load state dict by `jit.save/io.save_inference_model` save format + # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] + # The model saved by `save_inference_model` does not completely correspond to + # the information required by the `state_dict` under the dygraph. + # `save_inference_model` not save structured name, we need to remind + # the user to configure the `use_structured_name` argument when `set_state_dict` + # NOTE(chenweihang): `jit.save` doesn't save optimizer state + load_result = _load_state_dict_from_save_inference_model(path, + config) + else: + # load state dict by `io.save_params/persistables` save format + # TODO(chenweihang): [ Now only supports loading parameters seperately ] + # If users save all parameters as one file, the [ variable.name -> variable ] + # mapping info will lost, so users need to give variable list, but users build + # variable list in dygraph mode is difficult, we recommend users to use + # paddle.static.load_program_state in this case + load_result = _load_state_dict_from_save_params(path) + else: + raise ValueError( + "Unsupported path format, now only supports file or directory.") + + return load_result diff --git a/python/paddle/io/__init__.py b/python/paddle/io/__init__.py index 6f0b0f3c9c1..92dd819b3cd 100644 --- a/python/paddle/io/__init__.py +++ b/python/paddle/io/__init__.py @@ -25,16 +25,8 @@ __all__ = [ 'Sampler', 'SequenceSampler', 'RandomSampler', - 'load', - 'save', - 'load_program_state', - 'set_program_state', - 'load_inference_model', - 'save_inference_model', ] from ..fluid.io import DataLoader from ..fluid.dataloader import Dataset, IterableDataset, BatchSampler, get_worker_info, \ TensorDataset, Sampler, SequenceSampler, RandomSampler, DistributedBatchSampler -from ..fluid.io import load, save, load_program_state, set_program_state, \ - load_inference_model, save_inference_model, batch diff --git a/python/paddle/static/__init__.py b/python/paddle/static/__init__.py index 42a28a4f04e..e0a9bc6eec3 100644 --- a/python/paddle/static/__init__.py +++ b/python/paddle/static/__init__.py @@ -17,8 +17,9 @@ __all__ = [ 'append_backward', 'gradients', 'Executor', 'global_scope', 'scope_guard', 'BuildStrategy', 'CompiledProgram', 'Print', 'py_func', 'ExecutionStrategy', 'name_scope', 'ParallelExecutor', 'program_guard', 'WeightNormParamAttr', - 'default_main_program', 'default_startup_program', 'Program', 'save', - 'load', 'data', 'InputSpec' + 'default_main_program', 'default_startup_program', 'Program', 'data', + 'InputSpec', 'save', 'load', 'save_inference_model', 'load_inference_model', + 'load_program_state', 'set_program_state' ] from . import nn @@ -41,5 +42,9 @@ from ..fluid.layers.control_flow import Print #DEFINE_ALIAS from ..fluid.layers.nn import py_func #DEFINE_ALIAS from ..fluid.parallel_executor import ParallelExecutor #DEFINE_ALIAS from ..fluid.param_attr import WeightNormParamAttr #DEFINE_ALIAS -from ..tensor.io import save #DEFINE_ALIAS -from ..tensor.io import load #DEFINE_ALIAS +from ..fluid.io import save #DEFINE_ALIAS +from ..fluid.io import load #DEFINE_ALIAS +from ..fluid.io import save_inference_model #DEFINE_ALIAS +from ..fluid.io import load_inference_model #DEFINE_ALIAS +from ..fluid.io import load_program_state #DEFINE_ALIAS +from ..fluid.io import set_program_state #DEFINE_ALIAS diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index cec989fba8b..b6bab16c968 100755 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -42,8 +42,6 @@ from .creation import tril #DEFINE_ALIAS from .creation import meshgrid #DEFINE_ALIAS from .creation import empty #DEFINE_ALIAS from .creation import empty_like #DEFINE_ALIAS -from .io import save #DEFINE_ALIAS -from .io import load #DEFINE_ALIAS from .linalg import matmul #DEFINE_ALIAS from .linalg import dot #DEFINE_ALIAS # from .linalg import einsum #DEFINE_ALIAS diff --git a/python/paddle/tensor/io.py b/python/paddle/tensor/io.py deleted file mode 100644 index 66e956e8e4b..00000000000 --- a/python/paddle/tensor/io.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# TODO: define functions to save & load a tensor -from ..fluid import save #DEFINE_ALIAS -from ..fluid.io import load #DEFINE_ALIAS - -__all__ = ['save', 'load'] -- GitLab From 66951ab2eaf345422e91ba448fba7755834d3b38 Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Wed, 23 Sep 2020 20:39:24 +0800 Subject: [PATCH 057/117] modified timeout value for 4 ut (#27462) --- paddle/fluid/inference/tests/api/CMakeLists.txt | 7 +++++++ python/paddle/fluid/contrib/slim/tests/CMakeLists.txt | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 146d5932577..28211d0ce08 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -198,6 +198,9 @@ inference_analysis_test(test_analyzer_ernie_large SRCS analyzer_ernie_tester.cc if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 150 LABELS "RUN_TYPE=NIGHTLY") endif() +if (WIN32) + set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 200) +endif() # text_classification set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") @@ -258,6 +261,10 @@ set(RESNET50_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/resnet50") download_data(${RESNET50_MODEL_DIR} "resnet50_model.tar.gz") inference_analysis_api_test_with_fake_data_run(test_analyzer_resnet50 ${IMG_CLASS_TEST_APP} ${RESNET50_MODEL_DIR} true) +if (WIN32) + set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 200) +endif() + # mobilenet with depthwise_conv op set(MOBILENET_MODEL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv") diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 6ac005060e0..dd4bea06572 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -270,7 +270,7 @@ foreach(src ${TEST_OPS}) endforeach() # setting timeout value for old unittests -if(NOT WIN32 AND NOT APPLE) +if(NOT WIN32) set_tests_properties(test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 250 LABELS "RUN_TYPE=NIGHTLY") set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 200 LABELS "RUN_TYPE=NIGHTLY") endif() -- GitLab From c0caf0e45fac2bbbae184a702a4fe0313a9c3c69 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 23 Sep 2020 20:44:15 +0800 Subject: [PATCH 058/117] fix ut for static graph (#27506) --- python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py | 3 +++ .../tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py index d5b1284e3ce..c09f22f3fc5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -15,11 +15,14 @@ from __future__ import print_function import unittest +import paddle import paddle.fluid as fluid import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory +paddle.enable_static() + # For Net base_lr = 0.2 emb_lr = base_lr * 3 diff --git a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py index bca91c536ba..ee099e48eff 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py +++ b/python/paddle/fluid/tests/unittests/test_dist_lookup_sparse_table_fuse_ops.py @@ -20,6 +20,9 @@ import numpy as np import paddle.fluid as fluid import paddle.fluid.core as core +import paddle +paddle.enable_static() + class TestLookupTableFuseOp(unittest.TestCase): def test_fuse(self): -- GitLab From 4a9d21de4987ced5aaf58a318ac598abff853b48 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Thu, 24 Sep 2020 10:11:06 +0800 Subject: [PATCH 059/117] Add GPU Kernels of Segment Ops, support, sum, max, min, mean Add GPU Kernels of Segment Ops, support, sum, max, min, mean --- .../fluid/operators/math/segment_pooling.cu | 365 ++++++++++++++++++ paddle/fluid/operators/segment_pool_op.cu | 28 ++ paddle/fluid/operators/segment_pool_op.h | 40 ++ paddle/fluid/platform/cuda_primitives.h | 107 +++++ 4 files changed, 540 insertions(+) create mode 100644 paddle/fluid/operators/math/segment_pooling.cu create mode 100644 paddle/fluid/operators/segment_pool_op.cu diff --git a/paddle/fluid/operators/math/segment_pooling.cu b/paddle/fluid/operators/math/segment_pooling.cu new file mode 100644 index 00000000000..bb2b6db100b --- /dev/null +++ b/paddle/fluid/operators/math/segment_pooling.cu @@ -0,0 +1,365 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include "paddle/fluid/operators/gather.cu.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/segment_pooling.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_launch_param_config.h" +#include "paddle/fluid/platform/macros.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void SegmentMeanCustomKernel( + const Index* segment_ids, const T* input, T* output, T* summed_ids, + const Index input_length_size, const Index inner_dim_size, + const Index output_length_size, const Index total_stripe_count) { + CUDA_KERNEL_LOOP(stripe_index, total_stripe_count) { + const Index segment_offset = stripe_index % inner_dim_size; + const Index dim_index_base = + stripe_index / inner_dim_size * Index(DimTileSize); + const Index actual_height = + min(Index(DimTileSize), input_length_size - dim_index_base); + + Index first_segment_id = segment_ids[dim_index_base]; + Index last_segment_id = -1; + if (dim_index_base > 0) { + last_segment_id = segment_ids[dim_index_base - 1]; + } + if (segment_offset == 0) { + T sum = T(0); + for (Index j = 0; j < actual_height; j++) { + Index current_segment_id = segment_ids[dim_index_base + j]; + // Note(ZHUI): following check may cause + // cudaErrorLaunchOutOfResources. + // PADDLE_ENFORCE(current_segment_id >= last_segment_id, + // "the segment ids should be sorted, but got " + // "segment_ids[%d]:%d > segment_ids[%d]:%d.", + // dim_index_base + j - 1, dim_index_base + j, + // last_segment_id, current_segment_id); + + if (j > 0 && current_segment_id > last_segment_id) { + if (last_segment_id == first_segment_id) { + platform::CudaAtomicAdd(summed_ids + last_segment_id, sum); + } else { + *(summed_ids + last_segment_id) = sum; + } + sum = T(0); + } + sum += T(1); + last_segment_id = current_segment_id; + } + platform::CudaAtomicAdd(summed_ids + last_segment_id, sum); + } + // ensure last_segment_id is the largest + last_segment_id = output_length_size; + __syncthreads(); + T sum = T(0); + for (Index j = 0; j < actual_height; j++) { + Index current_segment_id = segment_ids[dim_index_base + j]; + if (current_segment_id > last_segment_id) { + const Index output_index = + last_segment_id * inner_dim_size + segment_offset; + if (last_segment_id == first_segment_id) { + platform::CudaAtomicAdd(output + output_index, + sum / *(summed_ids + last_segment_id)); + } else { + *(output + output_index) = sum / *(summed_ids + last_segment_id); + } + sum = T(0); + } + sum += input[(dim_index_base + j) * inner_dim_size + segment_offset]; + last_segment_id = current_segment_id; + } + const Index output_index = + last_segment_id * inner_dim_size + segment_offset; + platform::CudaAtomicAdd(output + output_index, + sum / *(summed_ids + last_segment_id)); + } +} + +template +__global__ void SegmentOpsKernel(const Index* segment_ids, const T* input, + T* output, Helper h, Pool pool) { + CUDA_KERNEL_LOOP(stripe_index, h.total_stripe_count) { + Index segment_offset, dim_index_base, actual_height; + Index inner_dim_size = h.inner_dim_size; + h.calculate(stripe_index, segment_offset, dim_index_base, actual_height); + + T minmax = pool.initial(); + Index first_segment_id = segment_ids[dim_index_base]; + // -1 is for the start value when interval_id = 0 + Index last_segment_id = -1; + if (dim_index_base > 0) { + last_segment_id = segment_ids[dim_index_base - 1]; + } + + for (Index j = 0; j < actual_height; j++) { + Index current_segment_id = segment_ids[dim_index_base + j]; + // ensure the segment_ids is sorted. + PADDLE_ENFORCE(current_segment_id >= last_segment_id, + "The segment ids should be sorted, but got " + "segment_ids[%d]:%d > segment_ids[%d]:%d.", + dim_index_base + j - 1, dim_index_base + j, + last_segment_id, current_segment_id); + + if (current_segment_id > last_segment_id) { + // reset the interval value which do not have corresponding ids. + for (Index interval_id = last_segment_id + 1; + interval_id < current_segment_id; ++interval_id) { + *(output + interval_id * inner_dim_size + segment_offset) = 0; + } + // don't update result when j=0 + if (j > 0) { + const Index output_index = + last_segment_id * inner_dim_size + segment_offset; + if (last_segment_id == first_segment_id) { + pool.atomic(output + output_index, minmax); + } else { + *(output + output_index) = minmax; + } + minmax = pool.initial(); + } + } + pool.compute( + input[(dim_index_base + j) * inner_dim_size + segment_offset], + &minmax); + last_segment_id = current_segment_id; + } + const Index output_index = + last_segment_id * inner_dim_size + segment_offset; + pool.atomic(output + output_index, minmax); + } +} + +template +__global__ void SegmentIndexGradKernel(const Index* segment_ids, const T* input, + const T* output, const T* out_grad, + T* in_grad, Helper h) { + CUDA_KERNEL_LOOP(stripe_index, h.total_stripe_count) { + Index segment_offset, dim_index_base, actual_height; + h.calculate(stripe_index, segment_offset, dim_index_base, actual_height); + + for (Index j = 0; j < actual_height; j++) { + Index current_segment_id = segment_ids[dim_index_base + j]; + Index input_index = + (dim_index_base + j) * h.inner_dim_size + segment_offset; + Index output_index = + current_segment_id * h.inner_dim_size + segment_offset; + if (input[input_index] == output[output_index]) { + in_grad[input_index] = out_grad[output_index]; + } + } + } +} + +template +class MaxPool { + public: + DEVICE inline T initial() { return static_cast(-FLT_MAX); } + DEVICE inline void compute(const T& x, T* y) { *y = *y > x ? *y : x; } + DEVICE inline T atomic(T* address, const T val) { + return platform::CudaAtomicMax(address, val); + } +}; + +template +class MinPool { + public: + DEVICE inline T initial() { return static_cast(FLT_MAX); } + DEVICE inline void compute(const T& x, T* y) { *y = *y < x ? *y : x; } + DEVICE inline T atomic(T* address, const T val) { + return platform::CudaAtomicMin(address, val); + } +}; + +template +class SumPool { + public: + DEVICE inline T initial() { return static_cast(0); } + DEVICE inline void compute(const T& x, T* y) { *y = *y + x; } + DEVICE inline T atomic(T* address, const T val) { + return platform::CudaAtomicAdd(address, val); + } +}; + +template +class ArrangeHelper { + public: + const T input_total_size; + const T input_length_size; + const T output_length_size; + T inner_dim_size; + T total_stripe_count; + const T DimTileSize = 8; + + ArrangeHelper(T a, T b, T c) + : input_total_size(a), input_length_size(b), output_length_size(c) { + T input_outer_dim_num_stripe = + (input_length_size + DimTileSize - 1) / DimTileSize; + inner_dim_size = input_total_size / input_length_size; + total_stripe_count = inner_dim_size * input_outer_dim_num_stripe; + } + + DEVICE inline void calculate(T stripe_index, T& segment_offset, + T& dim_index_base, T& actual_height) { + segment_offset = stripe_index % inner_dim_size; + dim_index_base = stripe_index / inner_dim_size * DimTileSize; + actual_height = min(DimTileSize, input_length_size - dim_index_base); + } +}; + +template +void SegmentPoolCUDAGradFunctor(const platform::CUDADeviceContext& ctx, + const framework::Tensor& input, + const framework::Tensor& segment_ids, + const framework::Tensor& output, + const framework::Tensor& out_grad, + framework::Tensor* in_grad, + const std::string pooltype = "SUM") { + auto h = ArrangeHelper(input.numel(), segment_ids.dims()[0], + output.dims()[0]); + auto config = platform::GetGpuLaunchConfig1D(ctx, h.total_stripe_count); + if (pooltype == "MAX" || pooltype == "MIN") { + SegmentIndexGradKernel><<< + config.block_per_grid.x, config.thread_per_block.x, 0, ctx.stream()>>>( + segment_ids.data(), input.data(), output.data(), + out_grad.data(), in_grad->data(), h); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling grad operation, Only MAX, MIN " + "available, but got %s.", + pooltype)); + } +} + +template +__global__ void SimpleDiv(T* x, const T* y, const int len, const int dim) { + for (int i = blockIdx.x; i < len; i += gridDim.x) { + __shared__ T y_i; + auto base = i * dim; + if (threadIdx.x == 0) { + y_i = y[i]; + } + __syncthreads(); + for (int j = threadIdx.x; j < dim; j += blockDim.x) { + x[base + j] /= y_i; + } + } +} + +template +class SegmentPoolFunctor { + public: + void operator()(const platform::CUDADeviceContext& ctx, + const framework::Tensor& input, + const framework::Tensor& segment_ids, + framework::Tensor* output, + framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM") { + auto h = ArrangeHelper(input.numel(), segment_ids.dims()[0], + output->dims()[0]); + auto config = platform::GetGpuLaunchConfig1D(ctx, h.total_stripe_count); + if (pooltype == "MEAN") { + SegmentMeanCustomKernel< + T, IndexT, IndexT(8)><<>>( + segment_ids.data(), input.data(), output->data(), + summed_ids->data(), h.input_length_size, h.inner_dim_size, + h.output_length_size, h.total_stripe_count); + } else if (pooltype == "SUM") { + SumPool pool; + SegmentOpsKernel< + T, IndexT, ArrangeHelper, + SumPool><<>>(segment_ids.data(), + input.data(), output->data(), h, + pool); + } else if (pooltype == "MAX") { + MaxPool pool; + SegmentOpsKernel< + T, IndexT, ArrangeHelper, + MaxPool><<>>(segment_ids.data(), + input.data(), output->data(), h, + pool); + } else if (pooltype == "MIN") { + MinPool pool; + SegmentOpsKernel< + T, IndexT, ArrangeHelper, + MinPool><<>>(segment_ids.data(), + input.data(), output->data(), h, + pool); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling operation, Only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + } +}; + +template +class SegmentPoolGradFunctor { + public: + void operator()(const platform::CUDADeviceContext& context, + const framework::Tensor& input, + const framework::Tensor& output, + const framework::Tensor& out_grad, + const framework::Tensor& segments, framework::Tensor* in_grad, + const framework::Tensor* summed_ids = nullptr, + const std::string pooltype = "SUM") { + if (pooltype == "MAX" || pooltype == "MIN") { + SegmentPoolCUDAGradFunctor(context, input, segments, output, + out_grad, in_grad, pooltype); + } else if (pooltype == "MEAN") { + framework::Tensor mean_grad; + mean_grad.mutable_data(input.dims(), context.GetPlace()); + framework::TensorCopy(out_grad, context.GetPlace(), context, &mean_grad); + int len = output.dims()[0]; + int dim = output.numel() / len; + auto config = platform::GetGpuLaunchConfig1D(context, len); + SimpleDiv<<>>(mean_grad.data(), + summed_ids->data(), len, dim); + GPUGather(context, mean_grad, segments, in_grad); + } else if (pooltype == "SUM") { + GPUGather(context, out_grad, segments, in_grad); + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported segment pooling operation, Only MEAN, SUM, MAX, MIN " + "available, but got %s.", + pooltype)); + } + } +}; + +using CUDA = paddle::platform::CUDADeviceContext; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; +template class SegmentPoolGradFunctor; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/segment_pool_op.cu b/paddle/fluid/operators/segment_pool_op.cu new file mode 100644 index 00000000000..dc92d7fcc3a --- /dev/null +++ b/paddle/fluid/operators/segment_pool_op.cu @@ -0,0 +1,28 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/gather.cu.h" +#include "paddle/fluid/operators/segment_pool_op.h" +#include "paddle/fluid/platform/cuda_primitives.h" +#include "paddle/fluid/platform/gpu_launch_param_config.h" + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL( + segment_pool, + ops::SegmentPoolKernel, + ops::SegmentPoolKernel); +REGISTER_OP_CUDA_KERNEL( + segment_pool_grad, + ops::SegmentPoolGradKernel, + ops::SegmentPoolGradKernel); diff --git a/paddle/fluid/operators/segment_pool_op.h b/paddle/fluid/operators/segment_pool_op.h index a505946b9f5..23b0c31608d 100644 --- a/paddle/fluid/operators/segment_pool_op.h +++ b/paddle/fluid/operators/segment_pool_op.h @@ -63,6 +63,46 @@ void SegmentKernelLaunchHelper(const framework::ExecutionContext& context) { auto& dev_ctx = context.template device_context(); set_zero(dev_ctx, output, static_cast(0)); } +#ifdef PADDLE_WITH_CUDA + if (!cpu_place) { + Tensor length; + length.mutable_data(framework::make_ddim({1}), + platform::CPUPlace()); + IndexT* length_data = length.data(); + const IndexT* segment_ids = segment->data(); + + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaMemcpy(length_data, segment_ids + num_indices - 1, sizeof(IndexT), + cudaMemcpyDeviceToHost)); + + IndexT length_host = length_data[0]; + length_host++; + PADDLE_ENFORCE_GT( + length_host, 0, + platform::errors::InvalidArgument( + "Segment ids must be >= 0, but got last id %d", length_data[0])); + auto dims = input->dims(); + dims[0] = static_cast(length_host); + output->Resize({dims}); + output->mutable_data(context.GetPlace()); + T init_value = 0; + if (pooltype == "MAX") { + init_value = static_cast(-FLT_MAX); + } else if (pooltype == "MIN") { + init_value = static_cast(FLT_MAX); + } + math::SetConstant setconst; + auto& dev_ctx = context.template device_context(); + setconst(dev_ctx, output, static_cast(init_value)); + // the gpu kernel of mean pool record the counts of segment_ids + if (pooltype == "MEAN") { + summed_ids = context.Output("SummedIds"); + summed_ids->Resize({dims[0], 1}); + summed_ids->mutable_data(context.GetPlace()); + setconst(dev_ctx, summed_ids, static_cast(1e-12)); + } + } +#endif SegmentPoolFunctor pool; diff --git a/paddle/fluid/platform/cuda_primitives.h b/paddle/fluid/platform/cuda_primitives.h index 67ea64833d3..f7c77071b12 100644 --- a/paddle/fluid/platform/cuda_primitives.h +++ b/paddle/fluid/platform/cuda_primitives.h @@ -128,5 +128,112 @@ CUDA_ATOMIC_WRAPPER(Add, float16) { } #endif + +// For atomicMax +USE_CUDA_ATOMIC(Max, int); +USE_CUDA_ATOMIC(Max, unsigned int); +// CUDA API uses unsigned long long int, we cannot use uint64_t here. +// It because unsigned long long int is not necessarily uint64_t +USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT + +CUDA_ATOMIC_WRAPPER(Max, int64_t) { + // Here, we check long long int must be int64_t. + static_assert(sizeof(int64_t) == sizeof(long long int), // NOLINT + "long long should be int64"); + return CudaAtomicMax( + reinterpret_cast(address), // NOLINT + static_cast(val)); // NOLINT +} + +CUDA_ATOMIC_WRAPPER(Max, float) { + if (*address >= val) { + return; + } + + int *const address_as_i = (int *)address; + int old = *address_as_i, assumed; + + do { + assumed = old; + if (__int_as_float(assumed) >= val) { + break; + } + + old = atomicCAS(address_as_i, assumed, __float_as_int(val)); + } while (assumed != old); +} + +CUDA_ATOMIC_WRAPPER(Max, double) { + if (*address >= val) { + return; + } + + unsigned long long int *const address_as_ull = + (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + if (__longlong_as_double(assumed) >= val) { + break; + } + + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val)); + } while (assumed != old); +} + +// For atomicMin +USE_CUDA_ATOMIC(Min, int); +USE_CUDA_ATOMIC(Min, unsigned int); +// CUDA API uses unsigned long long int, we cannot use uint64_t here. +// It because unsigned long long int is not necessarily uint64_t +USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT + +CUDA_ATOMIC_WRAPPER(Min, int64_t) { + // Here, we check long long int must be int64_t. + static_assert(sizeof(int64_t) == sizeof(long long int), // NOLINT + "long long should be int64"); + return CudaAtomicMin( + reinterpret_cast(address), // NOLINT + static_cast(val)); // NOLINT +} + +CUDA_ATOMIC_WRAPPER(Min, float) { + if (*address <= val) { + return; + } + + int *const address_as_i = (int *)address; + int old = *address_as_i, assumed; + + do { + assumed = old; + if (__int_as_float(assumed) <= val) { + break; + } + + old = atomicCAS(address_as_i, assumed, __float_as_int(val)); + } while (assumed != old); +} + +CUDA_ATOMIC_WRAPPER(Min, double) { + if (*address <= val) { + return; + } + + unsigned long long int *const address_as_ull = + (unsigned long long int *)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + if (__longlong_as_double(assumed) <= val) { + break; + } + + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val)); + } while (assumed != old); +} + } // namespace platform } // namespace paddle -- GitLab From 5c8fdb59265e7e22a4bd52629e0038180d494ff5 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Thu, 24 Sep 2020 10:21:44 +0800 Subject: [PATCH 060/117] Fix GradientClipByGlobalNorm dtype bug (#27437) * fix dtype of gradientclipbyglobalnorm * fix dtype bug of GradientClipbyGlobalnorm --- python/paddle/fluid/clip.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 7b301ac19d1..04e4906868e 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -590,7 +590,7 @@ class GradientClipByGlobalNorm(GradientClipBase): global_norm_var = layers.reduce_sum(global_norm_var) global_norm_var = layers.sqrt(global_norm_var) max_global_norm = layers.fill_constant( - shape=[1], dtype='float32', value=self.clip_norm) + shape=[1], dtype=global_norm_var.dtype, value=self.clip_norm) clip_var = layers.elementwise_div( x=max_global_norm, y=layers.elementwise_max( @@ -635,7 +635,9 @@ class GradientClipByGlobalNorm(GradientClipBase): global_norm_var = layers.sums(sum_square_list) global_norm_var = layers.sqrt(x=global_norm_var) max_global_norm = layers.fill_constant( - shape=[1], dtype="float32", value=self.clip_norm) + shape=[1], + dtype=global_norm_var.dtype, + value=self.clip_norm) scale_var = layers.elementwise_div( x=max_global_norm, y=layers.elementwise_max( @@ -663,7 +665,7 @@ class GradientClipByGlobalNorm(GradientClipBase): context[self.group_name] = [] context[self.group_name + "_clip_value"] = self.clip_norm context[self.group_name + "_clip"] = layers.fill_constant( - shape=[1], dtype="float32", value=self.clip_norm) + shape=[1], dtype=grad.dtype, value=self.clip_norm) else: if not self.clip_norm == context[self.group_name + "_clip_value"]: raise ValueError( -- GitLab From dc713116e01898986e02f6f30f8279d343bfd957 Mon Sep 17 00:00:00 2001 From: wangchaochaohu Date: Wed, 23 Sep 2020 19:32:38 -0700 Subject: [PATCH 061/117] refine the error message for bath size like OP (#27446) * refine the error message for bath size like --- paddle/fluid/operators/batch_size_like.h | 40 ++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index d2cf3804930..f24a3c316a0 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -26,25 +26,47 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Input"), - "Input(Input) of %s should not be null.", Type()); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of %s should not be null.", Type()); + OP_INOUT_CHECK(ctx->HasInput("Input"), "Input", "Input", Type()); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", Type()); auto &shape = ctx->Attrs().Get>("shape"); - PADDLE_ENFORCE_GT(shape.size(), 0); + PADDLE_ENFORCE_GT(shape.size(), 0, + platform::errors::InvalidArgument( + "Shape size must be larger than 0, but received: %s.", + shape.size())); std::vector shape_int64(shape.size(), 0); std::transform(shape.begin(), shape.end(), shape_int64.begin(), [](int a) { return static_cast(a); }); auto output_dim = framework::make_ddim(shape_int64); int input_dim_idx = ctx->Attrs().Get("input_dim_idx"); - PADDLE_ENFORCE_GE(input_dim_idx, 0); - PADDLE_ENFORCE_GT(ctx->GetInputDim("Input").size(), input_dim_idx); + int input_dim_size = static_cast(ctx->GetInputDim("Input").size()); + PADDLE_ENFORCE_GE(input_dim_idx, 0, + platform::errors::InvalidArgument( + "Input dimension index must be larger " + "equal than 0, but received: %s.", + input_dim_idx)); + PADDLE_ENFORCE_GT(input_dim_size, input_dim_idx, + platform::errors::InvalidArgument( + "Input dimension size must be larger than " + "input dimension index, but received input " + "dimension size: %s, input dimension index: %s.", + input_dim_size, input_dim_idx)); int output_dim_idx = ctx->Attrs().Get("output_dim_idx"); - PADDLE_ENFORCE_GE(output_dim_idx, 0); - PADDLE_ENFORCE_GT(static_cast(shape.size()), output_dim_idx); + int output_dim_size = static_cast(shape.size()); + PADDLE_ENFORCE_GE(output_dim_idx, 0, + platform::errors::InvalidArgument( + "Output dimension index must be larger " + "equal than 0, but received: %s.", + output_dim_idx)); + PADDLE_ENFORCE_GT( + output_dim_size, output_dim_idx, + platform::errors::InvalidArgument( + "Output dimension size must be larger than output dimension index, " + "but received output dimension size: %s, output dimension index: " + "%s.", + output_dim_size, output_dim_idx)); output_dim[output_dim_idx] = ctx->GetInputDim("Input")[input_dim_idx]; ctx->SetOutputDim("Out", output_dim); -- GitLab From fc9d80bc9eb996310d9e3a4b1b7227c030e5b05a Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Thu, 24 Sep 2020 12:02:16 +0800 Subject: [PATCH 062/117] [Dy2Stat]rename StaticLayer into StaticFunction (#27487) * rename StaticLayer * rename --- .../dygraph/dygraph_to_static/convert_call_func.py | 10 +++++----- .../dygraph/dygraph_to_static/program_translator.py | 10 +++++----- python/paddle/fluid/dygraph/jit.py | 12 ++++++------ .../unittests/dygraph_to_static/test_declarative.py | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py b/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py index c837c8eb123..908587c0d9c 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_call_func.py @@ -29,7 +29,7 @@ import six from paddle.fluid.dygraph.dygraph_to_static.convert_operators import convert_len from paddle.fluid.dygraph.dygraph_to_static.logging_utils import TranslatorLogger -from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticLayer +from paddle.fluid.dygraph.dygraph_to_static.program_translator import StaticFunction from paddle.fluid.dygraph.dygraph_to_static.program_translator import convert_to_static from paddle.fluid.dygraph.dygraph_to_static.program_translator import unwrap_decorators from paddle.fluid.dygraph.layers import Layer @@ -143,14 +143,14 @@ def convert_call(func): # def foo(x): # return x # - # `foo` will be converted into a wrapper class, suppose as `StaticLayer`. - # And `foo.__globals__['foo']` will still return this `StaticLayer` instead of - # `foo` function. So `isinstance(fn, StaticLayer)` is added here. + # `foo` will be converted into a wrapper class, suppose as `StaticFunction`. + # And `foo.__globals__['foo']` will still return this `StaticFunction` instead of + # `foo` function. So `isinstance(fn, StaticFunction)` is added here. global_functions = set() for fn in func.__globals__.values(): if inspect.isfunction(fn): global_functions.add(fn) - elif isinstance(fn, StaticLayer): + elif isinstance(fn, StaticFunction): _, fn = unwrap_decorators(fn) global_functions.add(fn) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index 3b3b9bbe96f..ddf44d805d1 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -205,7 +205,7 @@ def unwrap_decorators(func): decorators = [] cur = func while True: - if isinstance(cur, StaticLayer): + if isinstance(cur, StaticFunction): decorators.append(cur) # Note: if `cur` is a method, keep it as bound method of class. instance = cur._class_instance @@ -218,7 +218,7 @@ def unwrap_decorators(func): return decorators, cur -class StaticLayer(object): +class StaticFunction(object): """ Wrapper class to Manage program conversion of decorated function. @@ -226,7 +226,7 @@ class StaticLayer(object): def __init__(self, function, input_spec=None): """ - Initializes a `StaticLayer`. + Initializes a `StaticFunction`. Args: function(callable): A function or method that will be converted into static program. @@ -268,12 +268,12 @@ class StaticLayer(object): In above case, `net(x, y)` will call `net.forward(x, y)` firstly that is a bound method of `Net` instance. After decorated by `@paddle.jit.to_static`, it will firstly to call `__get__` - to parse the class instance correctly instead of the `StaticLayer` instance. + to parse the class instance correctly instead of the `StaticFunction` instance. """ if instance not in self._descriptor_cache: if instance is None: return self - # Note(Aurelius84): To construct new instance of StaticLayer when we + # Note(Aurelius84): To construct new instance of StaticFunction when we # first encouter the bound function of layer and cache it. new_static_layer = self._clone() new_static_layer._class_instance = instance diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index d0e3d23b04b..194ebafb08e 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -28,7 +28,7 @@ from paddle.fluid.data_feeder import check_type from paddle.fluid.dygraph.base import program_desc_tracing_guard, switch_to_static_graph from paddle.fluid.dygraph.dygraph_to_static import logging_utils from paddle.fluid.dygraph.dygraph_to_static.logging_utils import set_code_level, set_verbosity -from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, StaticLayer, unwrap_decorators +from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, StaticFunction, unwrap_decorators from paddle.fluid.dygraph.io import EXTRA_VAR_INFO_FILENAME, VARIABLE_FILENAME, TranslatedLayer from paddle.fluid.dygraph.layers import Layer from paddle.fluid.executor import Executor, scope_guard @@ -141,7 +141,7 @@ def copy_decorator_attrs(original_func, decorated_obj): Args: original_func(callable): the original decorated function. - decorated_obj(StaticLayer): the target decorated StaticLayer object. + decorated_obj(StaticFunction): the target decorated StaticFunction object. """ decorator_name = "declarative" @@ -198,7 +198,7 @@ def declarative(function=None, input_spec=None): def decorated(python_func): """ - Decorates a python function into a StaticLayer object. + Decorates a python function into a StaticFunction object. """ # Step 1. unwrap the function if it is already decorated. _, python_func = unwrap_decorators(python_func) @@ -206,7 +206,7 @@ def declarative(function=None, input_spec=None): # Step 2. copy some attributes from original python function. static_layer = copy_decorator_attrs( original_func=python_func, - decorated_obj=StaticLayer( + decorated_obj=StaticFunction( function=python_func, input_spec=input_spec)) return static_layer @@ -214,7 +214,7 @@ def declarative(function=None, input_spec=None): # for usage: `declarative(foo, ...)` if function is not None: if isinstance(function, Layer): - if isinstance(function.forward, StaticLayer): + if isinstance(function.forward, StaticFunction): class_name = function.__class__.__name__ logging_utils.warn( "`{}.forward` has already been decorated somewhere. It will be redecorated to replace previous one.". @@ -868,7 +868,7 @@ def save(layer, model_path, input_spec=None, config=None): # 2. get program from Layer # TODO(chenweihang): add support for other method, not only forward - if isinstance(layer.forward, StaticLayer): + if isinstance(layer.forward, StaticFunction): concrete_program = layer.forward.concrete_program else: # transform in jit.save, if input_spec is incomplete, declarative will throw error diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index 450ef7557bc..095eda2a5cb 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -19,7 +19,7 @@ import paddle import paddle.fluid as fluid from paddle.static import InputSpec from paddle.fluid.dygraph import to_variable, declarative, ProgramTranslator, Layer, jit -from paddle.fluid.dygraph.dygraph_to_static.program_translator import ConcreteProgram, StaticLayer +from paddle.fluid.dygraph.dygraph_to_static.program_translator import ConcreteProgram, StaticFunction from test_basic_api_transformation import dyfunc_to_variable @@ -81,14 +81,14 @@ class SimpleNet(Layer): return z -class TestStaticLayerInstance(unittest.TestCase): +class TestStaticFunctionInstance(unittest.TestCase): def test_instance_same_class(self): with fluid.dygraph.guard(fluid.CPUPlace()): net_1 = SimpleNet() net_2 = SimpleNet() - self.assertTrue(isinstance(net_1.forward, StaticLayer)) - self.assertTrue(isinstance(net_2.forward, StaticLayer)) + self.assertTrue(isinstance(net_1.forward, StaticFunction)) + self.assertTrue(isinstance(net_2.forward, StaticFunction)) self.assertNotEqual(net_1.forward, net_2.forward) # convert layer into static progam of net_1 -- GitLab From 29f1560d8fbb1e516dfac5c609e6e869196475a5 Mon Sep 17 00:00:00 2001 From: chalsliu <45041955+chalsliu@users.noreply.github.com> Date: Thu, 24 Sep 2020 12:49:33 +0800 Subject: [PATCH 063/117] Disable ut quickly. --- paddle/scripts/paddle_build.sh | 5 ++++ tools/check_file_diff_approvals.sh | 2 +- tools/is_ut_disabled.py | 40 ++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 tools/is_ut_disabled.py diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 69303013d2a..ac6531a2cc5 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -988,6 +988,11 @@ set +x fi read testcase <<< $(echo "$line"|grep -oEi "\w+$") + if python $PADDLE_ROOT/tools/is_ut_disabled.py $testcase; then + echo $testcase" is disabled." + continue + fi + if [[ "$is_nightly" != "" ]] && [ ${NIGHTLY_MODE:-OFF} == "OFF" ]; then echo $testcase" will only run at night." continue diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 84254cc89bb..16e61d7c77a 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -286,7 +286,7 @@ fi # Get the list of PR authors with unresolved unit test issues pip install PyGithub # For getting PR related data -wget https://paddle-ci.gz.bcebos.com/blk/block.txt --no-check-certificate +wget https://sys-p0.bj.bcebos.com/blk/block.txt --no-check-certificate wget https://sys-p0.bj.bcebos.com/bk-ci/bk.txt --no-check-certificate HASUTFIXED=`python ${PADDLE_ROOT}/tools/check_ut.py | grep "has unit-test to be fixed" || true` if [ "${HASUTFIXED}" != "" ]; then diff --git a/tools/is_ut_disabled.py b/tools/is_ut_disabled.py new file mode 100644 index 00000000000..a21fe39e71e --- /dev/null +++ b/tools/is_ut_disabled.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Check whether ut is disabled. """ + +import os +import sys + + +def check_ut(): + """ Get disabled unit tests. """ + disable_ut_file = 'disable_ut' + cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/{}'.format( + disable_ut_file) + os.system(cmd) + with open(disable_ut_file) as utfile: + for u in utfile: + if u.rstrip('\r\n') == sys.argv[1]: + exit(0) + exit(1) + + +if __name__ == '__main__': + if len(sys.argv) != 2: + exit(1) + try: + check_ut() + except Exception as e: + print(e) + exit(1) -- GitLab From 78a27a2b0d7ad7b6676dc34ae305faf3ee5b0482 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Thu, 24 Sep 2020 12:54:53 +0800 Subject: [PATCH 064/117] Reproduce summary api (#27367) * reproduce summary api --- python/paddle/hapi/model.py | 5 +- python/paddle/hapi/model_summary.py | 219 ++++++++++++++++++++-------- python/paddle/tests/test_model.py | 15 +- 3 files changed, 173 insertions(+), 66 deletions(-) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index d41852c9d7f..53928ebed1b 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -1813,7 +1813,7 @@ class Model(object): return logs, outputs return logs - def summary(self, input_size=None, batch_size=None, dtype=None): + def summary(self, input_size=None, dtype=None): """Prints a string summary of the network. Args: @@ -1822,7 +1822,6 @@ class Model(object): one input, input_size can be tuple or InputSpec. if model have multiple input, input_size must be a list which contain every input's shape. Default: None. - batch_size (int, optional): batch size of input tensor, Default: None. dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. Returns: @@ -1859,7 +1858,7 @@ class Model(object): _input_size = input_size else: _input_size = self._inputs - return summary(self.network, _input_size, batch_size, dtype) + return summary(self.network, _input_size, dtype) def _verify_spec(self, specs, is_input=False): out_specs = [] diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py index d388ba62f2a..3ead3fc295c 100644 --- a/python/paddle/hapi/model_summary.py +++ b/python/paddle/hapi/model_summary.py @@ -25,7 +25,7 @@ from collections import OrderedDict __all__ = ['summary'] -def summary(net, input_size, batch_size=None, dtypes=None): +def summary(net, input_size, dtypes=None): """Prints a string summary of the network. Args: @@ -33,8 +33,8 @@ def summary(net, input_size, batch_size=None, dtypes=None): input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only have one input, input_size can be tuple or InputSpec. if model have multiple input, input_size must be a list which contain - every input's shape. - batch_size (int, optional): batch size of input tensor, Default: None. + every input's shape. Note that input_size only dim of + batch_size can be None or -1. dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. Returns: @@ -77,14 +77,12 @@ def summary(net, input_size, batch_size=None, dtypes=None): lenet = LeNet() - params_info = paddle.summary(lenet, (1, 28, 28)) + params_info = paddle.summary(lenet, (1, 1, 28, 28)) print(params_info) """ if isinstance(input_size, InputSpec): - _input_size = tuple(input_size.shape[1:]) - if batch_size is None: - batch_size = input_size.shape[0] + _input_size = tuple(input_size.shape) elif isinstance(input_size, list): _input_size = [] for item in input_size: @@ -96,9 +94,7 @@ def summary(net, input_size, batch_size=None, dtypes=None): type(item)) if isinstance(item, InputSpec): - _input_size.append(tuple(item.shape[1:])) - if batch_size is None: - batch_size = item.shape[0] + _input_size.append(tuple(item.shape)) else: _input_size.append(item) elif isinstance(input_size, int): @@ -106,28 +102,88 @@ def summary(net, input_size, batch_size=None, dtypes=None): else: _input_size = input_size - if batch_size is None: - batch_size = -1 - if not paddle.in_dynamic_mode(): warnings.warn( "Your model was created in static mode, this may not get correct summary information!" ) - result, params_info = summary_string(net, _input_size, batch_size, dtypes) + def _is_shape(shape): + for item in shape: + if isinstance(item, (list, tuple)): + return False + return True + + def _check_shape(shape): + num_unknown = 0 + new_shape = [] + for i in range(len(shape)): + item = shape[i] + if item is None or item == -1: + num_unknown += 1 + if num_unknown > 1: + raise ValueError( + 'Option input_size only the dim of batch_size can be None or -1.' + ) + item = 1 + elif isinstance(item, numbers.Number): + if item <= 0: + raise ValueError( + "Expected element in input size greater than zero, but got {}". + format(item)) + new_shape.append(item) + return tuple(new_shape) + + def _check_input(input_size): + if isinstance(input_size, (list, tuple)) and _is_shape(input_size): + return _check_shape(input_size) + else: + return [_check_input(i) for i in input_size] + + _input_size = _check_input(_input_size) + result, params_info = summary_string(net, _input_size, dtypes) print(result) return params_info -def summary_string(model, input_size, batch_size=-1, dtypes=None): - if dtypes == None: - dtypes = ['float32'] * len(input_size) +def summary_string(model, input_size, dtypes=None): + def _all_is_numper(items): + for item in items: + if not isinstance(item, numbers.Number): + return False + return True + + def _build_dtypes(input_size, dtype): + if dtype is None: + dtype = 'float32' + + if isinstance(input_size, (list, tuple)) and _all_is_numper(input_size): + return [dtype] + else: + return [_build_dtypes(i, dtype) for i in input_size] + + if not isinstance(dtypes, (list, tuple)): + dtypes = _build_dtypes(input_size, dtypes) + + batch_size = 1 summary_str = '' depth = len(list(model.sublayers())) + def _get_shape_from_tensor(x): + if isinstance(x, (paddle.fluid.Variable, paddle.fluid.core.VarBase)): + return list(x.shape) + elif isinstance(x, (list, tuple)): + return [_get_shape_from_tensor(xx) for xx in x] + + def _get_output_shape(output): + if isinstance(output, (list, tuple)): + output_shape = [_get_output_shape(o) for o in output] + else: + output_shape = list(output.shape) + return output_shape + def register_hook(layer): def hook(layer, input, output): class_name = str(layer.__class__).split(".")[-1].split("'")[0] @@ -139,14 +195,18 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): m_key = "%s-%i" % (class_name, layer_idx + 1) summary[m_key] = OrderedDict() - summary[m_key]["input_shape"] = list(input[0].shape) - summary[m_key]["input_shape"][0] = batch_size - if isinstance(output, (list, tuple)): - summary[m_key]["output_shape"] = [[-1] + list(o.shape)[1:] - for o in output] - else: - summary[m_key]["output_shape"] = list(output.shape) - summary[m_key]["output_shape"][0] = batch_size + + try: + summary[m_key]["input_shape"] = _get_shape_from_tensor(input) + except: + warnings.warn('Get layer {} input shape failed!') + summary[m_key]["input_shape"] = [] + + try: + summary[m_key]["output_shape"] = _get_output_shape(output) + except: + warnings.warn('Get layer {} output shape failed!') + summary[m_key]["output_shape"] params = 0 @@ -175,29 +235,22 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): hooks.append(layer.register_forward_post_hook(hook)) - def _check_input_size(input_sizes): - for input_size in input_sizes: - for item in input_size: - if not isinstance(item, numbers.Number): - raise TypeError( - "Expected item in input size be a number, but got {}". - format(type(item))) - - if item <= 0: - raise ValueError( - "Expected item in input size greater than zero, but got {}". - format(item)) - if isinstance(input_size, tuple): input_size = [input_size] - _check_input_size(input_size) + def build_input(input_size, dtypes): + if isinstance(input_size, (list, tuple)) and _all_is_numper(input_size): + if isinstance(dtypes, (list, tuple)): + dtype = dtypes[0] + else: + dtype = dtypes + return paddle.rand(list(input_size), dtype) + else: + return [ + build_input(i, dtype) for i, dtype in zip(input_size, dtypes) + ] - x = [ - paddle.rand( - [2] + list(in_size), dtype=dtype) - for in_size, dtype in zip(input_size, dtypes) - ] + x = build_input(input_size, dtypes) # create properties summary = OrderedDict() @@ -213,22 +266,65 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): for h in hooks: h.remove() - table_width = 80 - summary_str += "-" * table_width + "\n" - line_new = "{:>15} {:>20} {:>20} {:>15}".format( - "Layer (type)", "Input Shape", "Output Shape", "Param #") + def _get_str_length(summary): + head_length = { + 'layer_width': 15, + 'input_shape_width': 20, + 'output_shape_width': 20, + 'params_width': 15, + 'table_width': 75 + } + + for layer in summary: + if head_length['output_shape_width'] < len( + str(summary[layer]["output_shape"])): + head_length['output_shape_width'] = len( + str(summary[layer]["output_shape"])) + if head_length['input_shape_width'] < len( + str(summary[layer]["input_shape"])): + head_length['input_shape_width'] = len( + str(summary[layer]["input_shape"])) + if head_length['layer_width'] < len(str(layer)): + head_length['layer_width'] = len(str(layer)) + if head_length['params_width'] < len( + str(summary[layer]["nb_params"])): + head_length['params_width'] = len( + str(summary[layer]["nb_params"])) + + _temp_width = 0 + for k, v in head_length.items(): + if k != 'table_width': + _temp_width += v + + if head_length['table_width'] < _temp_width + 5: + head_length['table_width'] = _temp_width + 5 + + return head_length + + table_width = _get_str_length(summary) + + summary_str += "-" * table_width['table_width'] + "\n" + line_new = "{:^{}} {:^{}} {:^{}} {:^{}}".format( + "Layer (type)", table_width['layer_width'], "Input Shape", + table_width['input_shape_width'], "Output Shape", + table_width['output_shape_width'], "Param #", + table_width['params_width']) summary_str += line_new + "\n" - summary_str += "=" * table_width + "\n" + summary_str += "=" * table_width['table_width'] + "\n" total_params = 0 total_output = 0 trainable_params = 0 + max_length = 0 for layer in summary: # input_shape, output_shape, trainable, nb_params - line_new = "{:>15} {:>20} {:>20} {:>15}".format( - layer, + line_new = "{:^{}} {:^{}} {:^{}} {:^{}}".format( + layer, table_width['layer_width'], str(summary[layer]["input_shape"]), + table_width['input_shape_width'], str(summary[layer]["output_shape"]), - "{0:,}".format(summary[layer]["nb_params"]), ) + table_width['output_shape_width'], + "{0:,}".format(summary[layer]["nb_params"]), + table_width['params_width']) total_params += summary[layer]["nb_params"] try: @@ -242,25 +338,32 @@ def summary_string(model, input_size, batch_size=-1, dtypes=None): trainable_params += summary[layer]["nb_params"] summary_str += line_new + "\n" - # assume 4 bytes/number (float on cuda). - total_input_size = abs( - np.prod(sum(input_size, ())) * batch_size * 4. / (1024**2.)) + def _get_input_size(input_size, size): + if isinstance(input_size, (list, tuple)) and _all_is_numper(input_size): + size = abs(np.prod(input_size) * 4. / (1024**2.)) + else: + size = sum([_get_input_size(i, size) for i in input_size]) + return size + + total_input_size = _get_input_size(input_size, 0) + total_output_size = abs(2. * total_output * 4. / (1024**2.)) # x2 for gradients total_params_size = abs(total_params * 4. / (1024**2.)) total_size = total_params_size + total_output_size + total_input_size - summary_str += "=" * table_width + "\n" + summary_str += "=" * table_width['table_width'] + "\n" summary_str += "Total params: {0:,}".format(total_params) + "\n" summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n" summary_str += "Non-trainable params: {0:,}".format(total_params - trainable_params) + "\n" - summary_str += "-" * table_width + "\n" + summary_str += "-" * table_width['table_width'] + "\n" summary_str += "Input size (MB): %0.2f" % total_input_size + "\n" summary_str += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n" summary_str += "Params size (MB): %0.2f" % total_params_size + "\n" summary_str += "Estimated Total Size (MB): %0.2f" % total_size + "\n" - summary_str += "-" * table_width + "\n" + summary_str += "-" * table_width['table_width'] + "\n" + # return summary return summary_str, { 'total_params': total_params, diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index 62cc39c1f7b..c89cbbbfbda 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -494,17 +494,22 @@ class TestModelFunction(unittest.TestCase): model.summary(input_size=(20)) model.summary(input_size=[(20)]) - model.summary(input_size=(20), batch_size=2) + model.summary(input_size=(20), dtype='float32') def test_summary_nlp(self): paddle.enable_static() - nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) - paddle.summary(nlp_net, (1, 2)) + nlp_net = paddle.nn.GRU(input_size=2, + hidden_size=3, + num_layers=3, + direction="bidirectional") + paddle.summary(nlp_net, (1, 1, 2)) + rnn = paddle.nn.LSTM(16, 32, 2) + paddle.summary(rnn, [(-1, 23, 16), ((2, None, 32), (2, -1, 32))]) def test_summary_error(self): with self.assertRaises(TypeError): nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) - paddle.summary(nlp_net, (1, '2')) + paddle.summary(nlp_net, (1, 1, '2')) with self.assertRaises(ValueError): nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) @@ -512,7 +517,7 @@ class TestModelFunction(unittest.TestCase): paddle.disable_static() nlp_net = paddle.nn.GRU(input_size=2, hidden_size=3, num_layers=3) - paddle.summary(nlp_net, (1, 2)) + paddle.summary(nlp_net, (1, 1, 2)) def test_export_deploy_model(self): for dynamic in [True, False]: -- GitLab From f2c97b6da519138826a3be730f1468c1a5c69389 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 24 Sep 2020 13:14:27 +0800 Subject: [PATCH 065/117] replace dataset with fake data (#27519) --- .../contrib/tests/test_weight_decay_extend.py | 32 +++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py index 906d83fff4f..6000a44ceb6 100644 --- a/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py +++ b/python/paddle/fluid/contrib/tests/test_weight_decay_extend.py @@ -14,6 +14,7 @@ from __future__ import print_function +import six import unittest from functools import partial import numpy as np @@ -24,6 +25,24 @@ import contextlib paddle.enable_static() +def fake_imdb_reader(word_dict_size, + sample_num, + lower_seq_len=100, + upper_seq_len=200, + class_dim=2): + def __reader__(): + for _ in six.moves.range(sample_num): + length = np.random.random_integers( + low=lower_seq_len, high=upper_seq_len, size=[1])[0] + ids = np.random.random_integers( + low=0, high=word_dict_size - 1, size=[length]).astype('int64') + label = np.random.random_integers( + low=0, high=class_dim - 1, size=[1]).astype('int64')[0] + yield ids, label + + return __reader__ + + def get_places(): places = [fluid.CPUPlace()] if fluid.core.is_compiled_with_cuda(): @@ -68,10 +87,11 @@ def bow_net(data, class TestWeightDecay(unittest.TestCase): def setUp(self): - self.word_dict = paddle.dataset.imdb.word_dict() - reader = paddle.batch( - paddle.dataset.imdb.train(self.word_dict), batch_size=2)() - self.train_data = [next(reader) for _ in range(5)] + self.word_dict_len = 5147 + batch_size = 2 + reader = fake_imdb_reader(self.word_dict_len, batch_size * 100) + reader = paddle.batch(reader, batch_size=batch_size)() + self.train_data = [next(reader) for _ in range(3)] self.learning_rate = .5 def run_program(self, place, feed_list): @@ -103,7 +123,7 @@ class TestWeightDecay(unittest.TestCase): data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - avg_cost = model(data, label, len(self.word_dict)) + avg_cost = model(data, label, self.word_dict_len) AdamW = fluid.contrib.extend_with_decoupled_weight_decay( fluid.optimizer.Adam) @@ -127,7 +147,7 @@ class TestWeightDecay(unittest.TestCase): name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") - avg_cost = model(data, label, len(self.word_dict)) + avg_cost = model(data, label, self.word_dict_len) param_list = [(var, var * self.learning_rate) for var in main_prog.block(0).all_parameters()] -- GitLab From df43905f1295f76f08d98aab5b6cc6875b4597f0 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Thu, 24 Sep 2020 13:27:13 +0800 Subject: [PATCH 066/117] use iwyu clean include (#27267) * use iwyu clean include, test=develop, test=win * compilation error, test=develop * fix compilation error2, test=develop * fix compilation error3, test=develop * fix compilation error4, test=develop * fix compilation error5, test=develop * fix compilation error6, test=develop * fix compilation error7, test=develop * fix compilation error8, test=develop * fix compilation error8, test=develop * fix compilation error10, test=develop * fix compilation error11, test=develop --- paddle/fluid/framework/attribute.cc | 2 -- paddle/fluid/framework/block_desc.h | 2 ++ paddle/fluid/framework/c/c_api.cc | 10 ------- paddle/fluid/framework/c/c_api.h | 9 ++++++ paddle/fluid/framework/channel.h | 2 +- .../fluid/framework/copy_same_tensor_test.cc | 2 +- .../fluid/framework/data_device_transform.h | 2 ++ paddle/fluid/framework/data_feed.h | 10 +++++++ paddle/fluid/framework/data_feed_factory.cc | 4 +-- paddle/fluid/framework/data_feed_factory.h | 3 ++ .../fluid/framework/data_layout_transform.cc | 2 +- .../fluid/framework/data_layout_transform.h | 8 ++++++ .../framework/data_layout_transform_test.cc | 1 - paddle/fluid/framework/data_transform.cc | 7 ++++- paddle/fluid/framework/data_transform.h | 4 +++ paddle/fluid/framework/data_type.cc | 1 - paddle/fluid/framework/data_type.h | 11 ++++++-- paddle/fluid/framework/data_type_test.cc | 8 ++++++ paddle/fluid/framework/data_type_transform.h | 4 +++ paddle/fluid/framework/dataset_factory.cc | 3 -- paddle/fluid/framework/dataset_factory.h | 1 + paddle/fluid/framework/ddim.h | 1 + paddle/fluid/framework/ddim_test.cc | 1 - .../framework/details/all_reduce_op_handle.h | 11 ++++++++ .../framework/details/broadcast_op_handle.h | 14 ++++++++++ .../details/broadcast_op_handle_test.h | 4 ++- .../fluid/framework/details/build_strategy.h | 12 ++++++++ .../details/computation_op_handle.cc | 2 ++ .../framework/details/computation_op_handle.h | 12 ++++++++ .../details/eager_deletion_op_handle.h | 11 ++++++++ .../details/exception_holder_test.cc | 3 +- .../details/fetch_async_op_handle.cc | 7 ++++- .../framework/details/fetch_async_op_handle.h | 12 ++++++++ .../details/fetch_barrier_op_handle.cc | 2 ++ .../details/fetch_barrier_op_handle.h | 11 ++++++++ .../fluid/framework/details/fetch_op_handle.h | 11 ++++++++ .../details/fused_all_reduce_op_handle.h | 12 ++++++++ .../details/fused_broadcast_op_handle.h | 11 ++++++++ .../details/fused_broadcast_op_handle_test.cc | 9 ++++++ .../framework/details/gather_op_handle.h | 8 ++++++ .../details/gather_op_handle_test.cc | 4 +-- .../framework/details/multi_devices_helper.h | 11 ++++++-- .../framework/details/nan_inf_utils_detail.h | 6 ++++ .../fluid/framework/details/op_handle_base.h | 12 ++++++++ .../framework/details/reduce_op_handle.h | 15 ++++++++++ .../fluid/framework/details/rpc_op_handle.h | 10 +++++++ .../details/scale_loss_grad_op_handle.cc | 8 ++++++ .../details/scale_loss_grad_op_handle.h | 12 ++++++++ .../details/scope_buffered_monitor.h | 2 ++ .../details/share_tensor_buffer_functor.cc | 11 ++++++++ .../details/share_tensor_buffer_functor.h | 9 ++++++ .../details/share_tensor_buffer_op_handle.cc | 10 +++++++ .../details/share_tensor_buffer_op_handle.h | 12 ++++++++ .../details/sparse_all_reduce_op_handle.h | 11 ++++++++ paddle/fluid/framework/details/var_handle.h | 8 ++++++ .../framework/details/variable_visitor.cc | 8 ++++++ .../framework/details/variable_visitor.h | 7 +++++ paddle/fluid/framework/device_worker.cc | 4 ++- paddle/fluid/framework/device_worker.h | 12 ++++++++ .../fluid/framework/device_worker_factory.cc | 2 ++ .../fluid/framework/device_worker_factory.h | 3 ++ paddle/fluid/framework/device_worker_test.cc | 3 +- paddle/fluid/framework/dlpack_tensor.cc | 11 ++++++-- paddle/fluid/framework/dlpack_tensor.h | 3 ++ paddle/fluid/framework/dlpack_tensor_test.cc | 6 ++++ paddle/fluid/framework/downpour_worker.cc | 10 +++++-- paddle/fluid/framework/downpour_worker_opt.cc | 10 +++---- paddle/fluid/framework/eigen.h | 2 ++ paddle/fluid/framework/executor.h | 6 ++++ paddle/fluid/framework/executor_gc_helper.cc | 13 +++++---- paddle/fluid/framework/executor_gc_helper.h | 5 ++++ paddle/fluid/framework/feed_fetch_method.cc | 9 +++--- paddle/fluid/framework/feed_fetch_method.h | 4 +++ paddle/fluid/framework/fleet/fleet_wrapper.cc | 6 ---- paddle/fluid/framework/fleet/fleet_wrapper.h | 6 ++++ paddle/fluid/framework/fleet/gloo_wrapper.cc | 3 -- paddle/fluid/framework/fleet/heter_wrapper.cc | 16 +++-------- paddle/fluid/framework/fleet/nccl_wrapper.cc | 3 -- paddle/fluid/framework/fleet/nccl_wrapper.h | 7 +++++ paddle/fluid/framework/garbage_collector.h | 7 +++++ paddle/fluid/framework/generator.h | 1 - paddle/fluid/framework/hetercpu_worker.cc | 7 ----- paddle/fluid/framework/heterxpu_trainer.cc | 12 +------- paddle/fluid/framework/inlined_vector.h | 1 + paddle/fluid/framework/inlined_vector_test.cc | 4 +-- .../fluid/framework/io/crypto/cipher_utils.cc | 2 -- .../fluid/framework/io/crypto/cipher_utils.h | 1 + paddle/fluid/framework/io/fs.h | 1 + paddle/fluid/framework/io/shell.h | 1 + .../framework/ir/attention_lstm_fuse_pass.h | 2 ++ .../ir/conv_affine_channel_fuse_pass.cc | 13 +++++++-- .../ir/conv_affine_channel_fuse_pass.h | 3 ++ .../fluid/framework/ir/conv_bn_fuse_pass.cc | 11 ++++++-- paddle/fluid/framework/ir/conv_bn_fuse_pass.h | 3 ++ .../ir/conv_elementwise_add2_act_fuse_pass.h | 2 ++ .../ir/conv_elementwise_add_act_fuse_pass.cc | 1 + .../ir/conv_elementwise_add_act_fuse_pass.h | 2 ++ .../ir/conv_elementwise_add_fuse_pass.cc | 1 + .../ir/conv_elementwise_add_fuse_pass.h | 2 ++ .../fluid/framework/ir/cudnn_placement_pass.h | 1 + .../ir/delete_quant_dequant_op_pass.cc | 4 +-- .../ir/delete_quant_dequant_op_pass.h | 3 ++ .../embedding_eltwise_layernorm_fuse_pass.cc | 2 +- .../embedding_eltwise_layernorm_fuse_pass.h | 9 ++++++ .../ir/embedding_fc_lstm_fuse_pass.h | 2 ++ .../ir/fc_elementwise_layernorm_fuse_pass.cc | 2 ++ .../ir/fc_elementwise_layernorm_fuse_pass.h | 2 ++ paddle/fluid/framework/ir/fc_fuse_pass.cc | 4 +-- paddle/fluid/framework/ir/fc_fuse_pass.h | 2 ++ paddle/fluid/framework/ir/fc_gru_fuse_pass.cc | 3 ++ paddle/fluid/framework/ir/fc_gru_fuse_pass.h | 3 ++ .../fluid/framework/ir/fc_lstm_fuse_pass.cc | 3 ++ paddle/fluid/framework/ir/fc_lstm_fuse_pass.h | 2 ++ paddle/fluid/framework/ir/fuse_bn_act_pass.cc | 11 ++++++-- paddle/fluid/framework/ir/fuse_bn_act_pass.h | 4 +++ .../framework/ir/fuse_elewise_add_act_pass.h | 4 +++ .../fuse_adam_op_pass.cc | 13 +++++---- .../fuse_momentum_op_pass.cc | 11 +++++--- .../fuse_optimizer_op_pass.h | 11 ++++++++ .../fuse_sgd_op_pass.cc | 12 +++++--- paddle/fluid/framework/ir/fuse_pass_base.cc | 8 ++++++ paddle/fluid/framework/ir/fuse_pass_base.h | 10 +++++++ .../ir/fuse_relu_depthwise_conv_pass.h | 3 ++ .../ir/fusion_group/code_generator.h | 3 ++ .../ir/fusion_group/code_generator_tester.cc | 12 +++++--- .../fusion_group/elementwise_group_detector.h | 9 ++++++ .../ir/fusion_group/fusion_group_pass.h | 6 ++++ .../framework/ir/fusion_group/operation.h | 1 + paddle/fluid/framework/ir/graph.h | 7 +++++ paddle/fluid/framework/ir/graph_helper.h | 2 ++ .../fluid/framework/ir/graph_helper_test.cc | 1 - .../framework/ir/graph_pattern_detector.h | 10 +++++++ .../ir/graph_pattern_detector_tester.cc | 6 ++-- .../framework/ir/graph_to_program_pass.h | 2 ++ .../ir/graph_to_program_pass_test.cc | 3 +- paddle/fluid/framework/ir/graph_traits.cc | 5 ++-- paddle/fluid/framework/ir/graph_traits.h | 3 ++ paddle/fluid/framework/ir/graph_viz_pass.h | 2 ++ .../ir/identity_scale_op_clean_pass.cc | 2 ++ .../ir/identity_scale_op_clean_pass.h | 2 ++ paddle/fluid/framework/ir/is_test_pass.cc | 2 ++ paddle/fluid/framework/ir/is_test_pass.h | 2 ++ .../framework/ir/lock_free_optimize_pass.h | 1 + ...uffer_shared_cross_op_memory_reuse_pass.cc | 15 +++++----- .../buffer_shared_inplace_op_pass.cc | 5 +++- .../memory_optimize_pass/memory_reuse_pass.cc | 13 ++++++--- .../memory_optimize_pass/memory_reuse_pass.h | 13 +++++++++ .../ir/memory_optimize_pass/op_graph_view.cc | 10 +++++-- .../ir/memory_optimize_pass/op_graph_view.h | 9 ++++++ .../reference_count_pass_helper.cc | 2 -- .../reference_count_pass_helper.h | 3 ++ .../conv_activation_mkldnn_fuse_pass.cc | 9 +++++- .../mkldnn/conv_activation_mkldnn_fuse_pass.h | 3 ++ .../ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc | 1 - .../ir/mkldnn/conv_bias_mkldnn_fuse_pass.h | 4 +++ .../conv_concat_relu_mkldnn_fuse_pass.cc | 2 ++ .../conv_concat_relu_mkldnn_fuse_pass.h | 3 ++ .../conv_elementwise_add_mkldnn_fuse_pass.h | 7 +++++ .../framework/ir/mkldnn/cpu_quantize_pass.cc | 5 ++-- .../framework/ir/mkldnn/cpu_quantize_pass.h | 4 +++ .../ir/mkldnn/cpu_quantize_placement_pass.cc | 3 +- .../ir/mkldnn/cpu_quantize_placement_pass.h | 3 ++ .../ir/mkldnn/cpu_quantize_squash_pass.cc | 5 +++- .../ir/mkldnn/cpu_quantize_squash_pass.h | 3 ++ .../ir/mkldnn/depthwise_conv_mkldnn_pass.cc | 3 ++ .../ir/mkldnn/depthwise_conv_mkldnn_pass.h | 2 ++ .../framework/ir/mkldnn/fc_mkldnn_pass.cc | 15 ++++++---- .../framework/ir/mkldnn/fc_mkldnn_pass.h | 3 ++ .../matmul_transpose_reshape_fuse_pass.h | 5 +++- .../ir/mkldnn/mkldnn_placement_pass.h | 1 + ...eshape_transpose_matmul_mkldnn_fuse_pass.h | 3 ++ .../ir/mkldnn/scale_matmul_fuse_pass.cc | 4 +++ .../ir/mkldnn/scale_matmul_fuse_pass.h | 2 ++ .../framework/ir/multi_batch_merge_pass.h | 2 ++ .../multi_devices_graph_pass.h | 15 ++++++++++ .../sequential_execution_pass.cc | 11 ++++---- .../set_reader_device_info_utils.h | 8 ++++++ .../ir/multihead_matmul_fuse_pass.cc | 4 +-- .../framework/ir/multihead_matmul_fuse_pass.h | 9 ++++++ paddle/fluid/framework/ir/node.cc | 1 - paddle/fluid/framework/ir/node.h | 8 ++++++ paddle/fluid/framework/ir/pass.cc | 12 +++++--- paddle/fluid/framework/ir/pass.h | 2 ++ paddle/fluid/framework/ir/pass_builder.cc | 2 ++ paddle/fluid/framework/ir/pass_builder.h | 3 ++ paddle/fluid/framework/ir/pass_test.cc | 8 ++++-- .../fluid/framework/ir/placement_pass_base.h | 3 ++ .../ir/repeated_fc_relu_fuse_pass.cc | 3 +- .../framework/ir/repeated_fc_relu_fuse_pass.h | 3 ++ .../framework/ir/runtime_context_cache_pass.h | 3 ++ .../framework/ir/seq_concat_fc_fuse_pass.cc | 3 -- .../framework/ir/seq_concat_fc_fuse_pass.h | 2 ++ .../ir/seqconv_eltadd_relu_fuse_pass.cc | 3 ++ .../ir/seqconv_eltadd_relu_fuse_pass.h | 3 ++ .../framework/ir/seqpool_concat_fuse_pass.cc | 9 +++++- .../framework/ir/seqpool_concat_fuse_pass.h | 3 ++ .../ir/seqpool_cvm_concat_fuse_pass.cc | 5 ++-- .../ir/seqpool_cvm_concat_fuse_pass.h | 3 ++ .../ir/shuffle_channel_detect_pass.cc | 3 -- .../ir/shuffle_channel_detect_pass.h | 1 + .../ir/simplify_with_basic_ops_pass.cc | 3 +- .../ir/simplify_with_basic_ops_pass.h | 4 +++ .../framework/ir/skip_layernorm_fuse_pass.cc | 3 +- .../framework/ir/skip_layernorm_fuse_pass.h | 2 ++ .../framework/ir/squared_mat_sub_fuse_pass.h | 3 ++ .../fluid/framework/ir/subgraph_detector.cc | 7 ++--- paddle/fluid/framework/ir/subgraph_detector.h | 4 +++ .../framework/ir/sync_batch_norm_pass.cc | 5 ++-- .../ir/transpose_flatten_concat_fuse_pass.cc | 1 - paddle/fluid/framework/lod_rank_table.h | 1 + paddle/fluid/framework/lod_tensor.cc | 15 ++++------ paddle/fluid/framework/lod_tensor.h | 10 +++++++ paddle/fluid/framework/lod_tensor_test.cc | 3 -- paddle/fluid/framework/mixed_vector.h | 4 +-- paddle/fluid/framework/mixed_vector_test.cc | 4 +-- paddle/fluid/framework/naive_executor.h | 6 ++++ .../framework/no_need_buffer_vars_inference.h | 1 + paddle/fluid/framework/op_call_stack.h | 7 +++++ paddle/fluid/framework/op_compatible_info.h | 5 ++++ .../framework/op_compatible_info_test.cc | 7 +++-- paddle/fluid/framework/op_desc.h | 2 ++ paddle/fluid/framework/op_info.cc | 2 -- paddle/fluid/framework/op_info.h | 5 ++++ paddle/fluid/framework/op_kernel_type.h | 1 + paddle/fluid/framework/op_kernel_type_test.cc | 2 +- paddle/fluid/framework/op_proto_maker_test.cc | 6 ++++ paddle/fluid/framework/op_registry.cc | 2 -- paddle/fluid/framework/op_registry.h | 6 ++++ .../framework/op_version_registry_test.cc | 1 - paddle/fluid/framework/operator.h | 11 +++++++- paddle/fluid/framework/program_desc.h | 1 + paddle/fluid/framework/program_desc_test.cc | 4 ++- paddle/fluid/framework/prune.h | 1 + paddle/fluid/framework/prune_test.cc | 4 --- paddle/fluid/framework/pull_dense_worker.cc | 6 +++- paddle/fluid/framework/rw_lock_test.cc | 1 - paddle/fluid/framework/save_load_util.h | 2 ++ paddle/fluid/framework/save_load_util_test.cc | 3 -- paddle/fluid/framework/scope.h | 6 ++++ paddle/fluid/framework/scope_pool.cc | 4 +-- paddle/fluid/framework/scope_pool.h | 3 ++ paddle/fluid/framework/scope_test.cc | 8 +++++- paddle/fluid/framework/selected_rows.cc | 6 ++++ paddle/fluid/framework/selected_rows.h | 9 ++++++ paddle/fluid/framework/shape_inference.cc | 5 ---- paddle/fluid/framework/tensor.cc | 9 +++++- paddle/fluid/framework/tensor.h | 9 ++++++ paddle/fluid/framework/tensor_test.cc | 8 +++++- paddle/fluid/framework/tensor_util.cc | 2 +- paddle/fluid/framework/tensor_util.h | 3 ++ paddle/fluid/framework/tensor_util_test.cc | 2 +- paddle/fluid/framework/threadpool.cc | 2 -- paddle/fluid/framework/threadpool.h | 1 + paddle/fluid/framework/trainer.h | 12 +++++++- paddle/fluid/framework/trainer_factory.cc | 4 +-- paddle/fluid/framework/trainer_factory.h | 3 ++ paddle/fluid/framework/transfer_scope_cache.h | 4 +++ paddle/fluid/framework/tuple.h | 1 + paddle/fluid/framework/tuple_test.cc | 5 +--- paddle/fluid/framework/unroll_array_ops.h | 1 + .../fluid/framework/unroll_array_ops_test.cc | 3 +- paddle/fluid/framework/unused_var_check.cc | 4 +-- paddle/fluid/framework/unused_var_check.h | 1 - paddle/fluid/framework/var_desc.cc | 2 -- paddle/fluid/framework/var_desc.h | 1 + paddle/fluid/framework/var_type_inference.h | 3 +- .../framework/var_type_inference_test.cc | 4 +++ paddle/fluid/framework/var_type_traits.h | 7 +++-- paddle/fluid/framework/variable_helper.h | 2 ++ paddle/fluid/framework/variable_test.cc | 6 +--- paddle/fluid/framework/version.cc | 2 +- paddle/fluid/imperative/all_reduce.h | 8 ++++++ paddle/fluid/imperative/amp_auto_cast.cc | 7 ++--- paddle/fluid/imperative/amp_auto_cast.h | 2 ++ paddle/fluid/imperative/data_loader.cc | 4 --- paddle/fluid/imperative/jit/op_desc_meta.cc | 1 - paddle/fluid/imperative/jit/op_desc_meta.h | 1 + .../imperative/jit/program_desc_tracer.cc | 7 ++++- .../imperative/jit/program_desc_tracer.h | 7 +++++ paddle/fluid/imperative/layer.h | 9 ++++++ paddle/fluid/imperative/prepared_operator.h | 11 ++++++++ paddle/fluid/imperative/profiler.cc | 3 +- .../imperative/tests/nccl_context_test.cc | 2 +- .../analysis/ir_passes/subgraph_util.cc | 9 +++++- .../analysis/ir_passes/subgraph_util.h | 6 ++++ .../ir_passes/tensorrt_subgraph_pass.h | 10 +++++++ .../passes/adjust_cudnn_workspace_size_pass.h | 2 ++ .../passes/inference_op_replace_pass.h | 2 ++ .../analysis/passes/ir_graph_clean_pass.h | 3 ++ .../passes/ir_graph_to_program_pass.h | 3 ++ .../analysis/passes/memory_optimize_pass.cc | 21 +++++++------- .../analysis/passes/memory_optimize_pass.h | 9 ++++++ paddle/fluid/inference/api/analysis_config.cc | 4 +-- paddle/fluid/inference/api/api_impl.h | 8 ++++++ .../api/details/reset_tensor_array.cc | 6 ++++ .../api/details/reset_tensor_array.h | 8 ++++++ paddle/fluid/inference/api/helper.h | 1 + .../inference/api/mkldnn_quantizer_config.cc | 2 ++ .../fluid/inference/api/paddle_pass_builder.h | 1 + .../tensorrt/convert/activation_op.cc | 21 +++++++++++++- .../tensorrt/convert/batch_norm_op.cc | 13 ++++++++- .../inference/tensorrt/convert/concat_op.cc | 9 ++++++ .../inference/tensorrt/convert/conv2d_op.cc | 9 ++++++ .../inference/tensorrt/convert/dropout_op.cc | 9 ++++++ .../tensorrt/convert/emb_eltwise_layernorm.cc | 9 ++++++ .../fluid/inference/tensorrt/convert/fc_op.cc | 9 ++++++ .../inference/tensorrt/convert/gelu_op.cc | 12 ++++++++ .../tensorrt/convert/hard_sigmoid_op.cc | 9 ++++++ .../tensorrt/convert/hard_swish_op.cc | 12 ++++++++ .../tensorrt/convert/instance_norm_op.cc | 12 ++++++++ .../tensorrt/convert/leaky_relu_op.cc | 12 ++++++++ .../inference/tensorrt/convert/mul_op.cc | 9 ++++++ .../inference/tensorrt/convert/pad_op.cc | 9 ++++++ .../inference/tensorrt/convert/pool2d_op.cc | 9 ++++++ .../inference/tensorrt/convert/scale_op.cc | 9 ++++++ .../tensorrt/convert/shuffle_channel_op.cc | 9 ++++++ .../inference/tensorrt/convert/softmax_op.cc | 9 ++++++ .../inference/tensorrt/convert/swish_op.cc | 12 ++++++++ paddle/fluid/inference/tensorrt/engine.cc | 7 +++-- paddle/fluid/inference/tensorrt/engine.h | 12 ++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 6 ++++ paddle/fluid/inference/tensorrt/op_teller.h | 7 +++++ .../tensorrt/plugin/trt_plugin_factory.h | 1 + .../fluid/inference/tensorrt/test_engine.cc | 3 -- .../inference/tensorrt/trt_int8_calibrator.h | 1 + paddle/fluid/inference/utils/benchmark.cc | 2 +- paddle/fluid/inference/utils/io_utils.h | 5 ++++ .../memory/allocation/aligned_allocator.h | 1 + paddle/fluid/memory/allocation/allocator.h | 1 + .../allocator_facade_abs_flags_test.cc | 4 +-- .../allocator_facade_frac_flags_test.cc | 4 +-- .../memory/allocation/allocator_strategy.cc | 2 +- .../auto_growth_best_fit_allocator.h | 1 + .../auto_growth_best_fit_allocator_test.cc | 2 ++ .../memory/allocation/best_fit_allocator.cc | 6 +--- .../memory/allocation/best_fit_allocator.h | 7 +++++ .../memory/allocation/buffered_allocator.cc | 3 -- .../memory/allocation/buffered_allocator.h | 1 + .../fluid/memory/allocation/cpu_allocator.cc | 2 -- .../memory/allocation/locked_allocator.h | 1 + .../fluid/memory/allocation/mmap_allocator.cc | 5 ---- .../memory/allocation/mmap_allocator_test.cc | 2 -- .../allocation/naive_best_fit_allocator.h | 2 ++ .../memory/allocation/pinned_allocator.cc | 3 -- .../allocation/thread_local_allocator.h | 1 + paddle/fluid/memory/detail/buddy_allocator.cc | 2 -- .../memory/detail/buddy_allocator_test.cc | 3 -- .../memory/detail/system_allocator_test.cc | 1 - paddle/fluid/memory/malloc.h | 2 ++ .../fluid/operators/activation_cudnn_op.cu.cc | 9 ++++-- .../fluid/operators/array_to_lod_tensor_op.cc | 15 +++++++--- paddle/fluid/operators/assert_op.cc | 16 ++++++++++- paddle/fluid/operators/assign_op.cc | 16 ++++++++++- paddle/fluid/operators/assign_op.h | 7 +++++ paddle/fluid/operators/assign_op_test.cc | 2 -- paddle/fluid/operators/assign_value_op.cc | 17 ++++++++++- paddle/fluid/operators/assign_value_op.h | 1 + .../fluid/operators/beam_search_decode_op.cc | 14 +++++++++- .../collective/c_allreduce_max_op.cc | 15 ++++++++++ .../collective/c_allreduce_max_op.cu.cc | 7 +++++ .../collective/c_allreduce_min_op.cc | 15 ++++++++++ .../collective/c_allreduce_min_op.cu.cc | 7 +++++ .../collective/c_allreduce_prod_op.cc | 15 ++++++++++ .../collective/c_allreduce_prod_op.cu.cc | 7 +++++ .../collective/c_allreduce_sum_op.cc | 13 +++++++++ .../collective/c_allreduce_sum_op.cu.cc | 7 +++++ .../operators/collective/c_comm_init_op.cc | 14 ++++------ .../operators/collective/c_gen_nccl_id_op.cc | 24 ++++++++-------- .../operators/collective/c_reduce_max_op.cc | 15 ++++++++++ .../collective/c_reduce_max_op.cu.cc | 7 +++++ .../operators/collective/c_reduce_min_op.cc | 15 ++++++++++ .../collective/c_reduce_min_op.cu.cc | 7 +++++ .../operators/collective/c_reduce_prod_op.cc | 15 ++++++++++ .../collective/c_reduce_prod_op.cu.cc | 7 +++++ .../operators/collective/c_reduce_sum_op.cc | 15 ++++++++++ .../collective/c_reduce_sum_op.cu.cc | 7 +++++ .../collective/c_sync_calc_stream_op.cc | 14 ++++------ .../collective/c_sync_comm_stream_op.cc | 13 ++++----- .../operators/common_infer_shape_functions.cc | 7 ++++- .../operators/common_infer_shape_functions.h | 6 ++++ .../controlflow/conditional_block_infer_op.cc | 12 ++++++++ .../controlflow/conditional_block_op.h | 1 + .../conditional_block_op_helper.cc | 9 +++++- .../controlflow/conditional_block_op_helper.h | 7 +++++ .../controlflow/conditional_block_op_test.cc | 6 +--- paddle/fluid/operators/controlflow/feed_op.cc | 13 ++++++++- .../operators/controlflow/get_places_op.cc | 14 +++++++++- .../fluid/operators/controlflow/op_variant.h | 7 +++++ .../controlflow/recurrent_op_helper.cc | 11 ++++---- .../controlflow/recurrent_op_helper.h | 6 ++++ .../controlflow/tensor_array_read_write_op.cc | 11 +++++++- .../fluid/operators/controlflow/while_op.cc | 11 ++++++-- .../operators/controlflow/while_op_helper.h | 8 ++++++ paddle/fluid/operators/cudnn_lstm_op.cu.cc | 9 ++++-- paddle/fluid/operators/delete_var_op.cc | 13 +++++++++ .../fluid/operators/dequantize_abs_max_op.cc | 17 ++++++++++- .../fluid/operators/dequantize_abs_max_op.h | 8 ++++++ paddle/fluid/operators/dequantize_log_op.cc | 18 ++++++++++-- paddle/fluid/operators/dequantize_log_op.h | 7 +++++ .../async_sparse_param_update_recorder.h | 3 +- ...async_sparse_param_update_recorder_test.cc | 2 -- .../distributed/collective_client.cc | 6 ++-- .../operators/distributed/collective_client.h | 13 ++++++++- .../distributed/collective_server.cc | 8 +----- .../operators/distributed/collective_server.h | 8 +++++- .../distributed/collective_server_test.cc | 16 ++++------- .../operators/distributed/communicator.h | 3 +- .../distributed/communicator_test.cc | 4 --- .../grpc/grpc_bytebuffer_stream.cc | 4 +++ .../distributed/grpc/grpc_bytebuffer_stream.h | 4 +++ .../operators/distributed/grpc/grpc_client.h | 13 ++++++++- .../operators/distributed/grpc/grpc_serde.cc | 21 +++++++++----- .../operators/distributed/grpc/grpc_serde.h | 14 +++++++++- .../operators/distributed/grpc/grpc_server.cc | 14 ++++++++++ .../operators/distributed/grpc/grpc_server.h | 4 +++ .../grpc/grpc_variable_response.cc | 19 +++++++++++-- .../distributed/grpc/grpc_variable_response.h | 19 ++++++++++--- .../distributed/heart_beat_monitor.cc | 2 +- .../distributed/heart_beat_monitor.h | 7 ++--- .../distributed/heart_beat_monitor_test.cc | 3 -- .../operators/distributed/large_scale_kv.h | 1 - .../distributed/parameter_prefetch.cc | 22 +++++++-------- .../distributed/parameter_prefetch.h | 7 +++++ .../operators/distributed/parameter_recv.cc | 21 +++++++------- .../operators/distributed/parameter_send.cc | 27 ++++++++++-------- .../distributed/request_handler_impl.h | 8 +++++- .../fluid/operators/distributed/rpc_client.h | 11 +++++++- .../fluid/operators/distributed/rpc_server.cc | 14 ++++++++-- .../fluid/operators/distributed/rpc_server.h | 12 ++++++++ .../operators/distributed/sendrecvop_utils.cc | 19 +++++++------ .../operators/distributed/sendrecvop_utils.h | 16 +++++++++++ .../operators/distributed/varhandle_test.cc | 5 ---- .../operators/distributed/variable_response.h | 24 +++++++++++++--- .../distributed_ops/checkpoint_notify_op.cc | 20 ++++++++----- .../distributed_ops/fetch_barrier_op.cc | 24 +++++++++++----- .../distributed_ops/fl_listen_and_serv_op.h | 16 +++++++++++ .../distributed_ops/gen_nccl_id_op.cc | 16 +++++++---- .../distributed_ops/listen_and_serv_op.h | 16 +++++++++++ .../operators/distributed_ops/prefetch_op.cc | 23 +++++++++++---- .../operators/distributed_ops/recv_op.cc | 26 +++++++++++------ .../distributed_ops/ref_by_trainer_id_op.cc | 13 +++++++++ .../distributed_ops/ref_by_trainer_id_op.h | 1 + .../distributed_ops/send_barrier_op.cc | 23 ++++++++++----- .../operators/distributed_ops/send_op.cc | 28 +++++++++++-------- .../elementwise/elementwise_add_op.cc | 13 +++++++++ .../elementwise/elementwise_add_op.h | 1 + .../elementwise/elementwise_floordiv_op.cc | 17 +++++++++++ .../elementwise/elementwise_max_op.cc | 16 ++++++++++- .../elementwise/elementwise_min_op.cc | 16 ++++++++++- .../elementwise/elementwise_min_op.h | 1 + .../elementwise/elementwise_mod_op.cc | 17 +++++++++++ .../elementwise/elementwise_pow_op.cc | 16 ++++++++++- .../elementwise/elementwise_pow_op.h | 1 + .../elementwise/elementwise_sub_op.cc | 16 ++++++++++- .../mkldnn/elementwise_add_mkldnn_op.cc | 10 +++++++ .../test_elementwise_add_grad_grad.cc | 12 +------- .../test_elementwise_add_op_inplace.cc | 5 +--- paddle/fluid/operators/enqueue_op.cc | 15 ++++++++-- paddle/fluid/operators/eye_op.h | 2 +- .../fusion_transpose_flatten_concat_op.cu.cc | 7 ++++- paddle/fluid/operators/gather_test.cc | 3 -- .../operators/grid_sampler_cudnn_op.cu.cc | 6 ++++ paddle/fluid/operators/gru_op.cu.cc | 7 +++++ paddle/fluid/operators/hash_op.cc | 16 +++++++++++ paddle/fluid/operators/hash_op.h | 1 + paddle/fluid/operators/increment_op.cc | 16 ++++++++++- paddle/fluid/operators/isfinite_op.cc | 18 +++++++++++- paddle/fluid/operators/isfinite_op.h | 7 +++++ paddle/fluid/operators/isfinite_v2_op.cc | 25 +++++++++++++++-- paddle/fluid/operators/isfinite_v2_op.h | 7 +++++ paddle/fluid/operators/jit/gen/act.h | 1 + paddle/fluid/operators/jit/gen/blas.h | 1 + paddle/fluid/operators/jit/gen/embseqpool.cc | 3 +- paddle/fluid/operators/jit/gen/embseqpool.h | 1 + paddle/fluid/operators/jit/gen/gru.cc | 2 ++ paddle/fluid/operators/jit/gen/gru.h | 1 + paddle/fluid/operators/jit/gen/hopv.h | 1 + paddle/fluid/operators/jit/gen/lstm.cc | 2 ++ paddle/fluid/operators/jit/gen/lstm.h | 1 + paddle/fluid/operators/jit/gen/matmul.cc | 3 +- paddle/fluid/operators/jit/gen/matmul.h | 1 + paddle/fluid/operators/jit/gen/seqpool.h | 1 + paddle/fluid/operators/jit/gen/sgd.cc | 3 +- paddle/fluid/operators/jit/gen/sgd.h | 1 + paddle/fluid/operators/jit/gen/vbroadcast.h | 1 + paddle/fluid/operators/jit/gen_base.h | 1 + paddle/fluid/operators/jit/helper.h | 3 ++ paddle/fluid/operators/jit/kernel_pool.cc | 3 -- paddle/fluid/operators/jit/kernel_pool.h | 3 ++ .../jit/more/intrinsic/crf_decoding.h | 1 + .../operators/jit/more/intrinsic/layer_norm.h | 1 + paddle/fluid/operators/jit/more/mix/mix.h | 1 + paddle/fluid/operators/jit/more/mkl/mkl.h | 1 + paddle/fluid/operators/jit/refer/refer.h | 1 + paddle/fluid/operators/label_smooth_op.cc | 16 ++++++++++- paddle/fluid/operators/layer_norm_op.h | 9 ++++++ paddle/fluid/operators/lod_array_length_op.cc | 14 +++++++++- paddle/fluid/operators/lod_rank_table_op.cc | 15 +++++++++- .../fluid/operators/lod_tensor_to_array_op.cc | 15 ++++++---- paddle/fluid/operators/math/beam_search.cc | 12 ++++++-- .../fluid/operators/math/beam_search_test.cc | 2 +- paddle/fluid/operators/math/blas.cc | 2 +- paddle/fluid/operators/math/blas.h | 7 +++++ paddle/fluid/operators/math/blas_impl.h | 6 ++-- .../fluid/operators/math/concat_and_split.cc | 12 +++++++- paddle/fluid/operators/math/concat_test.cc | 2 +- .../fluid/operators/math/context_project.cc | 6 ++++ paddle/fluid/operators/math/context_project.h | 2 ++ .../fluid/operators/math/cos_sim_functor.cc | 6 ++++ paddle/fluid/operators/math/cos_sim_functor.h | 2 ++ paddle/fluid/operators/math/cpu_vec.h | 1 + paddle/fluid/operators/math/cpu_vec_test.cc | 4 +-- paddle/fluid/operators/math/cross_entropy.cc | 6 ++++ paddle/fluid/operators/math/gru_compute.cc | 7 +++++ paddle/fluid/operators/math/im2col.cc | 8 +++++- paddle/fluid/operators/math/lstm_compute.cc | 7 +++++ .../fluid/operators/math/matrix_bit_code.cc | 2 -- .../fluid/operators/math/matrix_inverse.cu.cc | 11 ++++++-- paddle/fluid/operators/math/pooling.cc | 3 -- paddle/fluid/operators/math/sample_prob.cc | 6 ++++ paddle/fluid/operators/math/sample_prob.h | 7 +++++ .../math/selected_rows_functor_test.cu.cc | 1 - paddle/fluid/operators/math/sequence2batch.cc | 6 ++++ .../fluid/operators/math/sequence_padding.cc | 10 +++++++ .../operators/math/sequence_padding_test.cc | 2 +- paddle/fluid/operators/math/sequence_scale.cc | 6 ++++ paddle/fluid/operators/math/sequence_scale.h | 7 +++++ paddle/fluid/operators/math/vol2col.cc | 7 ++++- paddle/fluid/operators/math/vol2col_test.cc | 3 +- paddle/fluid/operators/matmul_v2_op.h | 6 ++-- paddle/fluid/operators/max_sequence_len_op.cc | 14 +++++++++- paddle/fluid/operators/merge_lod_tensor_op.cc | 13 ++++++++- .../operators/mkldnn/activation_mkldnn_op.cc | 9 ++++++ .../operators/mkldnn/batch_norm_mkldnn_op.cc | 10 ++++++- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 8 ++++-- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 15 +++++++--- .../fluid/operators/mkldnn/lrn_mkldnn_op.cc | 11 ++++++-- .../operators/mkldnn/matmul_mkldnn_op.cc | 8 +++++- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 14 +++++++--- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 1 - .../operators/mkldnn/softmax_mkldnn_op.cc | 12 ++++++-- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 14 +++++++--- .../fluid/operators/nccl/nccl_gpu_common.cc | 1 - .../fluid/operators/op_debug_string_test.cc | 2 +- paddle/fluid/operators/print_op.cc | 16 ++++++++--- paddle/fluid/operators/rank_loss_op.cc | 16 ++++++++++- .../fluid/operators/reader/buffered_reader.h | 1 + paddle/fluid/operators/reader/py_reader.cc | 1 - paddle/fluid/operators/reader/py_reader.h | 3 ++ .../reader/reader_blocking_queue_test.cc | 5 +--- .../operators/reader/reader_op_registry.cc | 8 ++++-- .../operators/reader/reader_op_registry.h | 8 ++++++ paddle/fluid/operators/recurrent_op.cc | 10 +++++-- paddle/fluid/operators/recurrent_op.h | 6 ++++ .../operators/reduce_ops/frobenius_norm_op.cc | 15 +++++++++- .../operators/reduce_ops/reduce_all_op.cc | 15 ++++++++++ .../operators/reduce_ops/reduce_any_op.cc | 15 ++++++++++ .../operators/reduce_ops/reduce_prod_op.cc | 13 +++++++++ .../operators/reduce_ops/reduce_sum_op.cc | 15 +++++++++- .../reorder_lod_tensor_by_rank_op.cc | 13 ++++++++- paddle/fluid/operators/reshape_op.cc | 17 ++++++++++- .../fluid/operators/rnn_memory_helper_op.cc | 11 ++++++++ paddle/fluid/operators/scale_op.cc | 14 +++++++++- paddle/fluid/operators/scatter_test.cc | 5 ++-- .../sequence_ops/sequence_concat_op.cu.cc | 7 +++++ paddle/fluid/operators/softmax_cudnn_op.cu.cc | 8 +++++- paddle/fluid/operators/split_lod_tensor_op.cc | 13 ++++++++- paddle/fluid/operators/strided_memcpy.h | 2 ++ paddle/fluid/operators/strided_memcpy_test.cc | 2 +- paddle/fluid/operators/tensor_formatter.cc | 3 +- paddle/fluid/operators/tensor_formatter.h | 6 ++++ .../operators/tensorrt/tensorrt_engine_op.cc | 3 -- .../operators/tensorrt/tensorrt_engine_op.h | 12 ++++++++ .../test_leaky_relu_grad_grad_functor.h | 1 + paddle/fluid/platform/bfloat16.h | 6 ++++ paddle/fluid/platform/bfloat16_test.cc | 2 -- paddle/fluid/platform/collective_helper.cc | 4 --- paddle/fluid/platform/cpu_info.cc | 1 - paddle/fluid/platform/cuda_resource_pool.h | 1 + paddle/fluid/platform/cudnn_desc.h | 7 +++++ paddle/fluid/platform/cudnn_helper.h | 6 ++++ paddle/fluid/platform/device_code.cc | 2 ++ paddle/fluid/platform/device_code.h | 1 + paddle/fluid/platform/device_context.h | 9 +++++- .../fluid/platform/device_memory_aligment.h | 1 + paddle/fluid/platform/device_tracer.h | 2 ++ paddle/fluid/platform/dynload/cublas.h | 1 + paddle/fluid/platform/dynload/cuda_driver.h | 1 + paddle/fluid/platform/dynload/cudnn.h | 4 +-- paddle/fluid/platform/dynload/cupti.cc | 1 - paddle/fluid/platform/dynload/curand.h | 3 +- paddle/fluid/platform/dynload/cusolver.h | 3 +- .../fluid/platform/dynload/dynamic_loader.cc | 3 -- paddle/fluid/platform/dynload/mklml.h | 1 + paddle/fluid/platform/dynload/nccl.h | 2 +- paddle/fluid/platform/dynload/nvrtc.h | 1 + paddle/fluid/platform/dynload/warpctc.h | 1 + paddle/fluid/platform/enforce.h | 6 ++++ paddle/fluid/platform/errors_test.cc | 1 - paddle/fluid/platform/float16_test.cc | 4 +-- paddle/fluid/platform/lodtensor_printer.cc | 10 +++++-- paddle/fluid/platform/lodtensor_printer.h | 7 +++++ .../fluid/platform/lodtensor_printer_test.cc | 2 +- paddle/fluid/platform/monitor.cc | 1 - paddle/fluid/platform/monitor.h | 1 + paddle/fluid/platform/place_test.cc | 2 +- paddle/fluid/platform/profiler.cc | 13 --------- paddle/fluid/platform/profiler.h | 1 + paddle/fluid/platform/profiler_test.cc | 5 ++-- paddle/fluid/platform/stream/cuda_stream.h | 1 + paddle/fluid/platform/timer.h | 1 + paddle/fluid/string/piece_test.cc | 2 -- paddle/fluid/string/pretty_log.h | 1 + paddle/fluid/string/string_helper.cc | 4 +-- paddle/fluid/string/string_helper.h | 1 + paddle/testing/paddle_gtest_main.cc | 3 -- 616 files changed, 3308 insertions(+), 766 deletions(-) diff --git a/paddle/fluid/framework/attribute.cc b/paddle/fluid/framework/attribute.cc index 9ca3fe31a33..7460686c1a3 100644 --- a/paddle/fluid/framework/attribute.cc +++ b/paddle/fluid/framework/attribute.cc @@ -14,8 +14,6 @@ limitations under the License. */ #include "paddle/fluid/framework/attribute.h" -#include - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h index 5c6e4215162..8c8fcadb05b 100644 --- a/paddle/fluid/framework/block_desc.h +++ b/paddle/fluid/framework/block_desc.h @@ -30,6 +30,8 @@ namespace paddle { namespace framework { class ProgramDesc; +class OpDesc; +class VarDesc; // Each Protobuf Message, we provide a XXXBind class. In that class, we optimize // read/write speed. Only when we want the protobuf message, the local changes diff --git a/paddle/fluid/framework/c/c_api.cc b/paddle/fluid/framework/c/c_api.cc index 0dd2768ccb9..48181dac662 100644 --- a/paddle/fluid/framework/c/c_api.cc +++ b/paddle/fluid/framework/c/c_api.cc @@ -12,17 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include -#include - -#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/c/c_api.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/init.h" extern "C" { diff --git a/paddle/fluid/framework/c/c_api.h b/paddle/fluid/framework/c/c_api.h index 04dbfbebe5d..a9ec402f381 100644 --- a/paddle/fluid/framework/c/c_api.h +++ b/paddle/fluid/framework/c/c_api.h @@ -24,6 +24,15 @@ limitations under the License. */ #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class OpInfoMap; +} // namespace framework +namespace platform { +class DeviceContextPool; +} // namespace platform +} // namespace paddle + #ifdef __cplusplus extern "C" { #endif diff --git a/paddle/fluid/framework/channel.h b/paddle/fluid/framework/channel.h index 64a645bf8b2..503f1513aad 100644 --- a/paddle/fluid/framework/channel.h +++ b/paddle/fluid/framework/channel.h @@ -277,7 +277,7 @@ class ChannelObject { size_t finished = 0; while (finished < n && WaitForWrite(lock)) { size_t m = - std::min(n - finished, capacity_ + reading_count_ - data_.size()); + (std::min)(n - finished, capacity_ + reading_count_ - data_.size()); for (size_t i = 0; i < m; i++) { data_.push_back(std::move(p[finished++])); } diff --git a/paddle/fluid/framework/copy_same_tensor_test.cc b/paddle/fluid/framework/copy_same_tensor_test.cc index 9350c387a6e..5b89166e2f4 100644 --- a/paddle/fluid/framework/copy_same_tensor_test.cc +++ b/paddle/fluid/framework/copy_same_tensor_test.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include + #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/framework/data_device_transform.h b/paddle/fluid/framework/data_device_transform.h index 8ff97646cfc..60b52a5e706 100644 --- a/paddle/fluid/framework/data_device_transform.h +++ b/paddle/fluid/framework/data_device_transform.h @@ -21,6 +21,8 @@ limitations under the License. */ namespace paddle { namespace framework { +class Tensor; + void TransDataDevice(const Tensor& in, const platform::Place& dst_place, Tensor* out); diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index b48d152fe35..da156bfc5c7 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -41,6 +41,15 @@ limitations under the License. */ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/string/string_helper.h" +namespace paddle { +namespace framework { +class DataFeedDesc; +class LoDTensor; +class Scope; +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { @@ -418,6 +427,7 @@ class MultiSlotType { std::string DebugString() { std::stringstream ss; + ss << "\ntype: " << type_ << "\n"; ss << "offset: "; ss << "["; diff --git a/paddle/fluid/framework/data_feed_factory.cc b/paddle/fluid/framework/data_feed_factory.cc index 1d8aec76240..048d539f9b9 100644 --- a/paddle/fluid/framework/data_feed_factory.cc +++ b/paddle/fluid/framework/data_feed_factory.cc @@ -17,10 +17,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/data_feed.h" - namespace paddle { namespace framework { +class DataFeed; + typedef std::shared_ptr (*Createdata_feedFunction)(); typedef std::unordered_map data_feedMap; data_feedMap g_data_feed_map; diff --git a/paddle/fluid/framework/data_feed_factory.h b/paddle/fluid/framework/data_feed_factory.h index 13678edb0b8..49381a98706 100644 --- a/paddle/fluid/framework/data_feed_factory.h +++ b/paddle/fluid/framework/data_feed_factory.h @@ -16,10 +16,13 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_feed.h" namespace paddle { namespace framework { +class DataFeed; + class DataFeedFactory { public: static std::string DataFeedTypeList(); diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index f757e244e38..108cd9ac6d1 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/framework/data_layout_transform.h" + #include -#include #include "paddle/fluid/operators/math/math_function.h" #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index b92c47c2eb0..238f2d2e679 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -17,10 +17,18 @@ #include #include #include + #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" +namespace paddle { +namespace framework { +class OpKernelType; +class Tensor; +} // namespace framework +} // namespace paddle + #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc index 8dfad23db65..20443e9a3dc 100644 --- a/paddle/fluid/framework/data_layout_transform_test.cc +++ b/paddle/fluid/framework/data_layout_transform_test.cc @@ -15,7 +15,6 @@ #include "paddle/fluid/framework/data_layout_transform.h" #include "gtest/gtest.h" -#include "paddle/fluid/platform/device_context.h" TEST(DataTransform, DataLayoutFunction) { auto place = paddle::platform::CPUPlace(); diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index f54311eebfa..3a40de6988f 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -18,8 +18,13 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_type_transform.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + #ifdef PADDLE_WITH_MKLDNN -#include #include "paddle/fluid/platform/mkldnn_helper.h" #endif diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index ef2271d530d..2bbdac52ee4 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -30,6 +30,10 @@ limitations under the License. */ namespace paddle { namespace framework { +class OpKernelType; +class Tensor; +class Variable; + void TransformData(const OpKernelType &expected_kernel_type, const OpKernelType &kernel_type_for_var, const Tensor &input_tensor, Tensor *out); diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index 8188d5cde1b..e4be866dca1 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/framework/data_type.h" -#include #include #include diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 720e422e114..4477a9cac09 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -15,12 +15,19 @@ limitations under the License. */ #pragma once #include #include -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/float16.h" +namespace paddle { +namespace platform { +struct bfloat16; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 331596da33a..5599edcd222 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -14,9 +14,17 @@ #include "paddle/fluid/framework/data_type.h" #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/tensor.h" +namespace paddle { +namespace platform { +struct bfloat16; +struct float16; +} // namespace platform +} // namespace paddle + TEST(DataType, float16) { using paddle::framework::Tensor; using paddle::platform::CPUPlace; diff --git a/paddle/fluid/framework/data_type_transform.h b/paddle/fluid/framework/data_type_transform.h index 1c281b03ed6..b42b2f594aa 100644 --- a/paddle/fluid/framework/data_type_transform.h +++ b/paddle/fluid/framework/data_type_transform.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" @@ -23,6 +24,9 @@ limitations under the License. */ namespace paddle { namespace framework { +class OpKernelType; +class Tensor; + using KernelTypePair = std::pair; void TransDataType(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/dataset_factory.cc b/paddle/fluid/framework/dataset_factory.cc index 3a28c101d48..cdb513f70ad 100644 --- a/paddle/fluid/framework/dataset_factory.cc +++ b/paddle/fluid/framework/dataset_factory.cc @@ -13,12 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/dataset_factory.h" -#include #include #include -#include "paddle/fluid/framework/data_set.h" - namespace paddle { namespace framework { typedef std::unique_ptr (*CreateDatasetFunction)(); diff --git a/paddle/fluid/framework/dataset_factory.h b/paddle/fluid/framework/dataset_factory.h index d4a36cec22f..425c488daa8 100644 --- a/paddle/fluid/framework/dataset_factory.h +++ b/paddle/fluid/framework/dataset_factory.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_set.h" namespace paddle { diff --git a/paddle/fluid/framework/ddim.h b/paddle/fluid/framework/ddim.h index 29c4732f991..e69fb4e7619 100644 --- a/paddle/fluid/framework/ddim.h +++ b/paddle/fluid/framework/ddim.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/dim.h" namespace paddle { diff --git a/paddle/fluid/framework/ddim_test.cc b/paddle/fluid/framework/ddim_test.cc index b7b42fa019f..e89f77ae496 100644 --- a/paddle/fluid/framework/ddim_test.cc +++ b/paddle/fluid/framework/ddim_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include -#include #include "gtest/gtest.h" #include "paddle/fluid/framework/ddim.h" diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.h b/paddle/fluid/framework/details/all_reduce_op_handle.h index 36f5d3adfad..e0064ec2642 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.h +++ b/paddle/fluid/framework/details/all_reduce_op_handle.h @@ -20,6 +20,17 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class NCCLCommunicator; +} // namespace platform +} // namespace paddle #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/framework/details/nccl_op_handle.h" #include "paddle/fluid/platform/nccl_helper.h" diff --git a/paddle/fluid/framework/details/broadcast_op_handle.h b/paddle/fluid/framework/details/broadcast_op_handle.h index 588248d6454..1412e2cd9db 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.h +++ b/paddle/fluid/framework/details/broadcast_op_handle.h @@ -24,6 +24,20 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +namespace details { +struct VarHandle; +} // namespace details +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +struct NCCLContextMap; +} // namespace platform +} // namespace paddle + #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index 6d14c7e4e7b..e455879a68f 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -21,13 +21,15 @@ #include "gtest/gtest.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h" - #include "paddle/fluid/platform/device_context.h" namespace paddle { namespace framework { namespace details { +struct DummyVarHandle; +struct VarHandle; + namespace f = paddle::framework; namespace p = paddle::platform; diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 01d496d4ea7..87b27eaa440 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -27,6 +27,18 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +class PassBuilder; +} // namespace ir +} // namespace framework +namespace platform { +class NCCLCommunicator; +} // namespace platform +} // namespace paddle + #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc index 0b653e57f6d..2256b826ed5 100644 --- a/paddle/fluid/framework/details/computation_op_handle.cc +++ b/paddle/fluid/framework/details/computation_op_handle.cc @@ -19,6 +19,8 @@ namespace paddle { namespace framework { namespace details { +struct VarHandleBase; + ComputationOpHandle::ComputationOpHandle(ir::Node *node, Scope *scope, platform::Place place, size_t scope_idx) diff --git a/paddle/fluid/framework/details/computation_op_handle.h b/paddle/fluid/framework/details/computation_op_handle.h index 41c51b95800..3c219ee27d3 100644 --- a/paddle/fluid/framework/details/computation_op_handle.h +++ b/paddle/fluid/framework/details/computation_op_handle.h @@ -24,9 +24,21 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class OperatorBase; +class Scope; +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { +struct VarHandleBase; + class ComputationOpHandle : public OpHandleBase { public: ComputationOpHandle(ir::Node *node, Scope *scope, platform::Place place, diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.h b/paddle/fluid/framework/details/eager_deletion_op_handle.h index a048799a280..8edce6782de 100644 --- a/paddle/fluid/framework/details/eager_deletion_op_handle.h +++ b/paddle/fluid/framework/details/eager_deletion_op_handle.h @@ -19,12 +19,23 @@ #include #include #include + #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" +namespace paddle { +namespace platform { +class CUDADeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { class Scope; +class GarbageCollector; +namespace ir { +class Node; +} // namespace ir namespace ir { class MemOptVarInfo; diff --git a/paddle/fluid/framework/details/exception_holder_test.cc b/paddle/fluid/framework/details/exception_holder_test.cc index c20563a0860..3db358667eb 100644 --- a/paddle/fluid/framework/details/exception_holder_test.cc +++ b/paddle/fluid/framework/details/exception_holder_test.cc @@ -13,8 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/exception_holder.h" -#include -#include + #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/allocator.h" diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc index 6aae523365e..09aedafc6bb 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -15,9 +15,14 @@ #include "paddle/fluid/framework/details/fetch_async_op_handle.h" #include #include -#include #include "paddle/fluid/platform/profiler.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.h b/paddle/fluid/framework/details/fetch_async_op_handle.h index 691a3286c27..ff9271942da 100644 --- a/paddle/fluid/framework/details/fetch_async_op_handle.h +++ b/paddle/fluid/framework/details/fetch_async_op_handle.h @@ -22,6 +22,18 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class LoDTensor; +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/fetch_barrier_op_handle.cc b/paddle/fluid/framework/details/fetch_barrier_op_handle.cc index 127183a32e9..fc836ade786 100644 --- a/paddle/fluid/framework/details/fetch_barrier_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_barrier_op_handle.cc @@ -19,6 +19,8 @@ namespace paddle { namespace framework { namespace details { +struct VarHandleBase; + FetchBarrierOpHandle::FetchBarrierOpHandle( ir::Node *node, const std::vector &local_scopes, const std::vector &places) diff --git a/paddle/fluid/framework/details/fetch_barrier_op_handle.h b/paddle/fluid/framework/details/fetch_barrier_op_handle.h index d1f7e08b28e..7ce790f38e8 100644 --- a/paddle/fluid/framework/details/fetch_barrier_op_handle.h +++ b/paddle/fluid/framework/details/fetch_barrier_op_handle.h @@ -24,6 +24,15 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class Scope; +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { @@ -32,6 +41,8 @@ namespace details { // all places if there are multiple places, must init with // multiple dev_ctxes_ !!!! +struct VarHandleBase; + struct FetchBarrierOpHandle : public OpHandleBase { public: FetchBarrierOpHandle(ir::Node *node, const std::vector &local_scopes, diff --git a/paddle/fluid/framework/details/fetch_op_handle.h b/paddle/fluid/framework/details/fetch_op_handle.h index 31ffd1211d2..41deeb0af27 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.h +++ b/paddle/fluid/framework/details/fetch_op_handle.h @@ -22,6 +22,17 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.h b/paddle/fluid/framework/details/fused_all_reduce_op_handle.h index 16c13ac1c03..9bed792a42f 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.h +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.h @@ -17,10 +17,22 @@ #include #include #include + #include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class NCCLCommunicator; +} // namespace platform +} // namespace paddle #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/framework/details/nccl_op_handle.h" #include "paddle/fluid/platform/nccl_helper.h" diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.h b/paddle/fluid/framework/details/fused_broadcast_op_handle.h index 8a59d2bfa9a..8fd3ec56d18 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.h +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.h @@ -25,6 +25,17 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +struct NCCLContextMap; +} // namespace platform +} // namespace paddle + #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc index cbded074f20..761a5b5a30a 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc @@ -17,11 +17,20 @@ #include #include "gtest/gtest.h" #include "paddle/fluid/framework/details/broadcast_op_handle_test.h" +#include "paddle/fluid/framework/details/op_handle_base.h" + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle namespace paddle { namespace framework { namespace details { +struct VarHandle; + struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { std::vector out_varnames_; std::vector> nodes_; diff --git a/paddle/fluid/framework/details/gather_op_handle.h b/paddle/fluid/framework/details/gather_op_handle.h index ac87b246b50..9cbd94cd6b8 100644 --- a/paddle/fluid/framework/details/gather_op_handle.h +++ b/paddle/fluid/framework/details/gather_op_handle.h @@ -24,6 +24,14 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index 5d8562e7046..f3fcc1a436d 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -17,11 +17,11 @@ #include #include "gtest/gtest.h" -#include "paddle/fluid/platform/device_context.h" - namespace paddle { namespace framework { namespace details { +struct DummyVarHandle; + namespace f = paddle::framework; namespace p = paddle::platform; diff --git a/paddle/fluid/framework/details/multi_devices_helper.h b/paddle/fluid/framework/details/multi_devices_helper.h index 21e781877a4..c3a18433cf8 100644 --- a/paddle/fluid/framework/details/multi_devices_helper.h +++ b/paddle/fluid/framework/details/multi_devices_helper.h @@ -20,16 +20,21 @@ #include #include #include + #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/var_handle.h" - +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/place.h" -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/framework/ir/pass.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h index 15d00932f1c..b4459e5a7c1 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.h +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.h @@ -19,6 +19,12 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/place.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index 097f54d5d58..eb3d9c32ffc 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -24,10 +24,22 @@ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { class Scope; +namespace details { +struct VarHandleBase; +} // namespace details +namespace ir { +class Node; +} // namespace ir namespace details { diff --git a/paddle/fluid/framework/details/reduce_op_handle.h b/paddle/fluid/framework/details/reduce_op_handle.h index 8b92bdef475..e76a48d207d 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.h +++ b/paddle/fluid/framework/details/reduce_op_handle.h @@ -24,6 +24,21 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { +class SelectedRows; +namespace details { +struct VarHandle; +} // namespace details +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +struct NCCLContextMap; +} // namespace platform +} // namespace paddle #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/details/rpc_op_handle.h b/paddle/fluid/framework/details/rpc_op_handle.h index d86d33dd676..909f565f2c0 100644 --- a/paddle/fluid/framework/details/rpc_op_handle.h +++ b/paddle/fluid/framework/details/rpc_op_handle.h @@ -24,6 +24,16 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc index 6e2f2327abd..287667d5ee9 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc @@ -13,9 +13,17 @@ // limitations under the License. #include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h" + #include + #include "paddle/fluid/platform/profiler.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h index d4f28dbe2b2..02e5aa88443 100644 --- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.h +++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.h @@ -21,6 +21,18 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +class Scope; +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/scope_buffered_monitor.h b/paddle/fluid/framework/details/scope_buffered_monitor.h index 1246c35af6a..3a94534eff4 100644 --- a/paddle/fluid/framework/details/scope_buffered_monitor.h +++ b/paddle/fluid/framework/details/scope_buffered_monitor.h @@ -17,7 +17,9 @@ #include #include #include + #include "paddle/fluid/framework/scope.h" + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index 5fbaf3cbfe0..bf93d8f85b1 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -22,6 +22,17 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +class Scope; +class Tensor; +class Variable; +namespace ir { +class MemOptVarInfo; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.h b/paddle/fluid/framework/details/share_tensor_buffer_functor.h index be49d1c432b..0db69d07bf6 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.h @@ -25,6 +25,15 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" +namespace paddle { +namespace framework { +class Scope; +namespace ir { +class MemOptVarInfo; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc index be3f5515a97..3d53bb62855 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc @@ -23,10 +23,20 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +namespace ir { +class MemOptVarInfo; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { +class ComputationOpHandle; + ComputationOpHandle *GetUniquePendingComputationOpHandle( ShareTensorBufferOpHandle *share_tensor_op) { ComputationOpHandle *result_op = nullptr; diff --git a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h index a02c346485e..d14cbc31d82 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h +++ b/paddle/fluid/framework/details/share_tensor_buffer_op_handle.h @@ -22,10 +22,22 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" +namespace paddle { +namespace framework { +class Scope; +namespace ir { +class MemOptVarInfo; +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { +class ComputationOpHandle; + class ShareTensorBufferOpHandle : public OpHandleBase { public: ShareTensorBufferOpHandle( diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.h b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.h index b24b457d21d..8bfea0f1ae8 100644 --- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.h +++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.h @@ -23,6 +23,17 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/nccl_helper.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +namespace platform { +class NCCLCommunicator; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/var_handle.h b/paddle/fluid/framework/details/var_handle.h index bb38424d3ae..a35ac0bd732 100644 --- a/paddle/fluid/framework/details/var_handle.h +++ b/paddle/fluid/framework/details/var_handle.h @@ -24,6 +24,14 @@ #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/variable_visitor.cc b/paddle/fluid/framework/details/variable_visitor.cc index fba0c1bf463..71e5dd28ede 100644 --- a/paddle/fluid/framework/details/variable_visitor.cc +++ b/paddle/fluid/framework/details/variable_visitor.cc @@ -15,6 +15,14 @@ #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/framework/selected_rows.h" + +namespace paddle { +namespace framework { +class LoDTensor; +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/details/variable_visitor.h b/paddle/fluid/framework/details/variable_visitor.h index ca9a19bdcf1..a882d5120bc 100644 --- a/paddle/fluid/framework/details/variable_visitor.h +++ b/paddle/fluid/framework/details/variable_visitor.h @@ -17,6 +17,13 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/variable.h" +namespace paddle { +namespace framework { +class Tensor; +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace details { diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index aeec6161714..fbaae5a21c2 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/device_worker.h" -#include "xxhash.h" // NOLINT namespace paddle { namespace framework { +class LoDTensor; +class Scope; + void DeviceWorker::SetRootScope(Scope* root_scope) { root_scope_ = root_scope; } void DeviceWorker::SetDataFeed(DataFeed* data_feed) { diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index efe6fa1b2da..ee2ef9a0c3d 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -39,6 +39,18 @@ limitations under the License. */ #include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/timer.h" +namespace paddle { +namespace framework { +class LoDTensor; +class ProgramDesc; +class Scope; +class Tensor; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/nccl_helper.h" #endif diff --git a/paddle/fluid/framework/device_worker_factory.cc b/paddle/fluid/framework/device_worker_factory.cc index 67be8db6e80..3b60cb65e34 100644 --- a/paddle/fluid/framework/device_worker_factory.cc +++ b/paddle/fluid/framework/device_worker_factory.cc @@ -20,6 +20,8 @@ limitations under the License. */ namespace paddle { namespace framework { +class DeviceWorker; + typedef std::shared_ptr (*Createdevice_workerFunction)(); typedef std::unordered_map device_workerMap; diff --git a/paddle/fluid/framework/device_worker_factory.h b/paddle/fluid/framework/device_worker_factory.h index 9d0613385e7..6a31c3ea7a4 100644 --- a/paddle/fluid/framework/device_worker_factory.h +++ b/paddle/fluid/framework/device_worker_factory.h @@ -16,11 +16,14 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/device_worker.h" namespace paddle { namespace framework { +class DeviceWorker; + class DeviceWorkerFactory { public: static std::string DeviceWorkerTypeList(); diff --git a/paddle/fluid/framework/device_worker_test.cc b/paddle/fluid/framework/device_worker_test.cc index b488e4cfe7a..461d329a371 100644 --- a/paddle/fluid/framework/device_worker_test.cc +++ b/paddle/fluid/framework/device_worker_test.cc @@ -13,9 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/device_worker.h" + #include + #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/trainer.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 915589b3242..ac1e39ad2c1 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -11,10 +11,17 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/dlpack_tensor.h" #include - #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/dlpack_tensor.h" + +namespace paddle { +namespace platform { +struct bfloat16; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/dlpack_tensor.h b/paddle/fluid/framework/dlpack_tensor.h index 5346ba62894..e342523718b 100644 --- a/paddle/fluid/framework/dlpack_tensor.h +++ b/paddle/fluid/framework/dlpack_tensor.h @@ -15,11 +15,14 @@ #pragma once #include + #include "paddle/fluid/framework/tensor.h" namespace paddle { namespace framework { +class Tensor; + class DLPackTensor { public: using LaneType = decltype(::DLTensor::dtype.lanes); // uint16_t diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc index 4dead063b47..4a1f151f69b 100644 --- a/paddle/fluid/framework/dlpack_tensor_test.cc +++ b/paddle/fluid/framework/dlpack_tensor_test.cc @@ -17,6 +17,12 @@ #include #include +namespace paddle { +namespace platform { +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 1c64bf1d3f7..00f721701a4 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -13,10 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/device_worker_factory.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/platform/cpu_helper.h" -#include "paddle/fluid/string/string_helper.h" + +namespace paddle { +namespace framework { +class LoDTensor; +class Variable; +} // namespace framework +} // namespace paddle #if defined _WIN32 || defined __APPLE__ #else diff --git a/paddle/fluid/framework/downpour_worker_opt.cc b/paddle/fluid/framework/downpour_worker_opt.cc index b40a00ef9cb..afe6ddfa3d9 100644 --- a/paddle/fluid/framework/downpour_worker_opt.cc +++ b/paddle/fluid/framework/downpour_worker_opt.cc @@ -12,18 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/device_worker_factory.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/platform/cpu_helper.h" -#include "paddle/fluid/platform/lodtensor_printer.h" namespace paddle { namespace framework { +class OpDesc; +class OperatorBase; +class ProgramDesc; + bool HasDependentOutput(const OpDesc& op_desc, const std::unordered_set& dependent_vars) { for (auto& var : op_desc.Outputs()) { diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index 0e3edfb95cb..a6abda8a83b 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -14,6 +14,8 @@ limitations under the License. */ #pragma once +#include + #include "paddle/fluid/framework/tensor.h" #include "unsupported/Eigen/CXX11/Tensor" diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index fa6a65d5892..7593b60abff 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/garbage_collector.h" @@ -32,6 +33,11 @@ limitations under the License. */ namespace paddle { namespace framework { +class Dataset; +class ProgramDesc; +class Scope; +class TrainerBase; + struct ExecutorPrepareContext { ExecutorPrepareContext(const framework::ProgramDesc& prog, size_t block_id); diff --git a/paddle/fluid/framework/executor_gc_helper.cc b/paddle/fluid/framework/executor_gc_helper.cc index 706248229bc..c80eedb1b86 100644 --- a/paddle/fluid/framework/executor_gc_helper.cc +++ b/paddle/fluid/framework/executor_gc_helper.cc @@ -13,16 +13,19 @@ // limitations under the License. #include "paddle/fluid/framework/executor_gc_helper.h" + #include #include -#include #include #include -#include + #include "glog/logging.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/lod_tensor_array.h" -#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" +#include "paddle/fluid/framework/op_info.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/executor_gc_helper.h b/paddle/fluid/framework/executor_gc_helper.h index a4c71c5304e..e44edc5aa1c 100644 --- a/paddle/fluid/framework/executor_gc_helper.h +++ b/paddle/fluid/framework/executor_gc_helper.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" @@ -26,6 +27,10 @@ namespace paddle { namespace framework { // Result map: op -> variable names that can be deleted after op runs +class GarbageCollector; +class OperatorBase; +class Scope; + std::unordered_map> GetUnusedVars(const BlockDesc &block, const std::vector> &ops, diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index fd857f7735c..3bd85b2b24b 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -13,16 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_method.h" + #include -#include + #include "glog/logging.h" -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { +class LoDTensor; +class Variable; + void SetFeedVariable(Scope* scope, const LoDTensor& input, const std::string& var_name, size_t index) { // If var_name Variable is not found in GlobalScope, a new variable will diff --git a/paddle/fluid/framework/feed_fetch_method.h b/paddle/fluid/framework/feed_fetch_method.h index 65c8b255ffb..a52ef517c8b 100644 --- a/paddle/fluid/framework/feed_fetch_method.h +++ b/paddle/fluid/framework/feed_fetch_method.h @@ -15,12 +15,16 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/scope.h" namespace paddle { namespace framework { +class LoDTensor; +class Scope; + void SetFeedVariable(Scope* scope, const LoDTensor& input, const std::string& var_name, size_t index); diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.cc b/paddle/fluid/framework/fleet/fleet_wrapper.cc index 34fff042770..3c076805932 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.cc +++ b/paddle/fluid/framework/fleet/fleet_wrapper.cc @@ -29,12 +29,6 @@ limitations under the License. */ #include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include #include -#include "paddle/fluid/framework/channel.h" -#include "paddle/fluid/framework/data_feed.h" -#include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/timer.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.h b/paddle/fluid/framework/fleet/fleet_wrapper.h index cc13a50160a..be87bdf1e75 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.h +++ b/paddle/fluid/framework/fleet/fleet_wrapper.h @@ -35,6 +35,12 @@ limitations under the License. */ #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/fleet/gloo_wrapper.cc b/paddle/fluid/framework/fleet/gloo_wrapper.cc index f195dde4084..f4b2d2d7d18 100644 --- a/paddle/fluid/framework/fleet/gloo_wrapper.cc +++ b/paddle/fluid/framework/fleet/gloo_wrapper.cc @@ -10,10 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/fleet/gloo_wrapper.h" -#include // NOLINT -#include #include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/string/string_helper.h" namespace gloo { diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index b70d5e5fc1a..7a27b6a9d7a 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -27,15 +27,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/fleet/heter_wrapper.h" -#include -#include -#include "paddle/fluid/framework/channel.h" -#include "paddle/fluid/framework/data_feed.h" -#include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/timer.h" #ifdef PADDLE_WITH_PSLIB namespace paddle { @@ -122,14 +113,15 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, if (platform::is_cpu_place(tensor->place())) { memcpy(data_ptr, tensor->data(), tensor->numel() * SizeOfType(tensor->type())); - } #ifdef PADDLE_WITH_CUDA - else { + } else { memory::Copy(platform::CPUPlace(), data_ptr, BOOST_GET_CONST(platform::CUDAPlace, tensor->place()), tensor->data(), tensor->numel() * SizeOfType(tensor->type()), nullptr); } +#else + } #endif } @@ -239,7 +231,7 @@ void HeterWrapper::CallRemoteXpu(std::shared_ptr task, request.set_cur_batch(task->cur_batch_); OnHeterRpcDone* done = new OnHeterRpcDone([this, task, worker](void* done) { - auto* closure = (OnHeterRpcDone*)done; + auto* closure = reinterpret_cast(done); if (closure->cntl.Failed()) { VLOG(0) << "call xpu fail: " << closure->cntl.ErrorText(); } else { diff --git a/paddle/fluid/framework/fleet/nccl_wrapper.cc b/paddle/fluid/framework/fleet/nccl_wrapper.cc index 33a91388fd8..ed92e2e9aad 100644 --- a/paddle/fluid/framework/fleet/nccl_wrapper.cc +++ b/paddle/fluid/framework/fleet/nccl_wrapper.cc @@ -13,9 +13,6 @@ // limitations under the License. #include "paddle/fluid/framework/fleet/nccl_wrapper.h" -#include -#include "paddle/fluid/framework/data_feed.h" -#include "paddle/fluid/framework/scope.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/fleet/nccl_wrapper.h b/paddle/fluid/framework/fleet/nccl_wrapper.h index a55921f1ac2..3725a225dbe 100644 --- a/paddle/fluid/framework/fleet/nccl_wrapper.h +++ b/paddle/fluid/framework/fleet/nccl_wrapper.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable_helper.h" @@ -29,6 +30,12 @@ limitations under the License. */ #endif #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h index 4f773965282..884d230816b 100644 --- a/paddle/fluid/framework/garbage_collector.h +++ b/paddle/fluid/framework/garbage_collector.h @@ -19,9 +19,16 @@ #include #include // NOLINT #include + #include "gflags/gflags.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/generator.h b/paddle/fluid/framework/generator.h index a279c2e4e14..862e63c4c6a 100644 --- a/paddle/fluid/framework/generator.h +++ b/paddle/fluid/framework/generator.h @@ -16,7 +16,6 @@ limitations under the License. */ #include #include - #include #include #include // temp for debug diff --git a/paddle/fluid/framework/hetercpu_worker.cc b/paddle/fluid/framework/hetercpu_worker.cc index 83838f4df67..747fd434ae7 100644 --- a/paddle/fluid/framework/hetercpu_worker.cc +++ b/paddle/fluid/framework/hetercpu_worker.cc @@ -12,13 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/device_worker_factory.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" -#include "paddle/fluid/framework/fleet/heter_wrapper.h" -#include "paddle/fluid/platform/cpu_helper.h" -#include "paddle/fluid/string/string_helper.h" - #ifdef PADDLE_WITH_PSLIB #if defined _WIN32 || defined __APPLE__ diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc index 5ca1aa66319..fbed74800b4 100644 --- a/paddle/fluid/framework/heterxpu_trainer.cc +++ b/paddle/fluid/framework/heterxpu_trainer.cc @@ -12,16 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include -#include -#include "io/fs.h" -#include "paddle/fluid/framework/data_feed_factory.h" -#include "paddle/fluid/framework/data_set.h" -#include "paddle/fluid/framework/device_worker_factory.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" -#include "paddle/fluid/framework/trainer.h" #if (defined PADDLE_WITH_CUDA) && (defined PADDLE_WITH_PSLIB) #include "paddle/fluid/platform/cuda_device_guard.h" @@ -334,7 +324,7 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, std::shared_ptr context = object_pool_.Get(); if (!context->scope_) { - int num = rand() % places_.size(); + int num = rand_r() % places_.size(); context->place_num_ = num; auto place = places_[num]; context->scope_ = &(place_scopes_[num]->NewScope()); diff --git a/paddle/fluid/framework/inlined_vector.h b/paddle/fluid/framework/inlined_vector.h index 2a7f26b9f96..f8e937fa107 100644 --- a/paddle/fluid/framework/inlined_vector.h +++ b/paddle/fluid/framework/inlined_vector.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/inlined_vector_test.cc b/paddle/fluid/framework/inlined_vector_test.cc index 003c0d7bbea..581e7d8934d 100644 --- a/paddle/fluid/framework/inlined_vector_test.cc +++ b/paddle/fluid/framework/inlined_vector_test.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/inlined_vector.h" + #include #include -#include -#include + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/framework/io/crypto/cipher_utils.cc b/paddle/fluid/framework/io/crypto/cipher_utils.cc index e0c653e0016..ee9f06b2f3e 100644 --- a/paddle/fluid/framework/io/crypto/cipher_utils.cc +++ b/paddle/fluid/framework/io/crypto/cipher_utils.cc @@ -15,8 +15,6 @@ #include "paddle/fluid/framework/io/crypto/cipher_utils.h" #include - -#include #include #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/io/crypto/cipher_utils.h b/paddle/fluid/framework/io/crypto/cipher_utils.h index 936f62f6ba6..52db03f530c 100644 --- a/paddle/fluid/framework/io/crypto/cipher_utils.h +++ b/paddle/fluid/framework/io/crypto/cipher_utils.h @@ -17,6 +17,7 @@ #include #include #include + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/io/fs.h b/paddle/fluid/framework/io/fs.h index c88636e2674..bb6d720ca58 100644 --- a/paddle/fluid/framework/io/fs.h +++ b/paddle/fluid/framework/io/fs.h @@ -18,6 +18,7 @@ #include #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/io/shell.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/framework/io/shell.h b/paddle/fluid/framework/io/shell.h index dc486275d6f..7db5cd7661c 100644 --- a/paddle/fluid/framework/io/shell.h +++ b/paddle/fluid/framework/io/shell.h @@ -32,6 +32,7 @@ #include #include #include + #include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h index 47ed9f0393f..48e3989a531 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class AttentionLSTMFusePass : public FusePassBase { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc index fd8b55a6b7d..9c984a23e37 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc @@ -13,19 +13,28 @@ // limitations under the License. #include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h" + #include -#include -#include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +class LoDTensor; +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Node; + #define GET_CONV_BN_NODES(pattern_name) \ /* OPERATORS */ \ GET_IR_NODE_FROM_SUBGRAPH(conv, conv, pattern_name); \ diff --git a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h index d607020a47b..916384ec447 100644 --- a/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /* * Fuse the Conv and ConvAffineChannel. */ +class Graph; + class ConvAffineChannelFusePass : public FusePassBase { public: virtual ~ConvAffineChannelFusePass() {} diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc index fb787e08814..a915015bf55 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc @@ -13,15 +13,22 @@ // limitations under the License. #include "paddle/fluid/framework/ir/conv_bn_fuse_pass.h" -#include -#include + #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/math/cpu_vec.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +class LoDTensor; +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h index 57a9f69ca15..342cd8dad5f 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /* * Fuse the Conv and BatchNorm to a ConvBNMKLDNNOp. */ +class Graph; + class ConvBNFusePass : public FusePassBase { public: virtual ~ConvBNFusePass() {} diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h index ea9e465d8d7..e68f57d4ae9 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class ConvElementwiseAdd2ActFusePass : public FusePassBase { public: virtual ~ConvElementwiseAdd2ActFusePass() {} diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc index c5fa47ec55f..93e6e13ff70 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h" #include + #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h index 8b34c3551d8..933092c7db7 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class ConvElementwiseAddActFusePass : public FusePassBase { public: virtual ~ConvElementwiseAddActFusePass() {} diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc index 38c0b773dde..e4396f227f7 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h" #include + #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h index 66a562cdd19..7198a7488e0 100644 --- a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h +++ b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class ConvElementwiseAddFusePass : public FusePassBase { public: virtual ~ConvElementwiseAddFusePass() {} diff --git a/paddle/fluid/framework/ir/cudnn_placement_pass.h b/paddle/fluid/framework/ir/cudnn_placement_pass.h index 99dd3a175d3..8d84c2bf707 100644 --- a/paddle/fluid/framework/ir/cudnn_placement_pass.h +++ b/paddle/fluid/framework/ir/cudnn_placement_pass.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/placement_pass_base.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc index 4dfbd5e00c1..886b080c662 100644 --- a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc @@ -12,10 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/framework/ir/delete_quant_dequant_op_pass.h" -#include "paddle/fluid/framework/ir/graph_viz_pass.h" +#include namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.h b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.h index 938ada6453e..fea0498fdec 100644 --- a/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.h +++ b/paddle/fluid/framework/ir/delete_quant_dequant_op_pass.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -21,6 +22,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class DeleteQuantDequantOpPass : public FusePassBase { public: virtual ~DeleteQuantDequantOpPass() {} diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc index 3f88a460d14..51861b402d5 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h" -#include #include #include #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h index 644eb1cf892..25049d7468b 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h @@ -17,10 +17,19 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h index 65cb4439727..a9cde13758b 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h @@ -27,6 +27,8 @@ namespace ir { // Fusing of Embedding , FC and LSTM op // Just FC without bias +class Graph; + class EmbeddingFCLSTMFusePass : public FusePassBase { public: virtual ~EmbeddingFCLSTMFusePass() {} diff --git a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc index 5c18a0d6c7f..bedb9689641 100644 --- a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h" + #include #include #include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h index ac4d0b39ee2..12e4c44b84e 100644 --- a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class FCElementwiseLayerNormFusePass : public FusePassBase { public: virtual ~FCElementwiseLayerNormFusePass() {} diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index d60510a4074..0248aeedd0a 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fc_fuse_pass.h" -#include + #include -#include #include + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.h b/paddle/fluid/framework/ir/fc_fuse_pass.h index ef6636d109a..f564bbb1518 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_fuse_pass.h @@ -26,6 +26,8 @@ namespace ir { /* * Fuse the MUL and ELEMENTWISE_ADD to a FCOp. */ +class Graph; + class FCFusePass : public FusePassBase { public: virtual ~FCFusePass() {} diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc index f5fea90ac2f..c4515bbc455 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -22,6 +23,8 @@ namespace paddle { namespace framework { namespace ir { +class Node; + static int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, bool with_fc_bias) { GraphPatternDetector gpd; diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass.h index e11cdac7ea9..73f00504d34 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -25,6 +26,8 @@ namespace ir { // The MulGRUFusePass and MulGRUFusePass will fuse to the same FusionGRU op. +class Graph; + class FCGRUFusePass : public FusePassBase { public: virtual ~FCGRUFusePass() {} diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index a3c57e14e1a..2b451da7bfa 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -22,6 +23,8 @@ namespace paddle { namespace framework { namespace ir { +class Node; + int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope, bool with_fc_bias) { GraphPatternDetector gpd; diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h index 5dea7c91a86..d37f53b15f0 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.h @@ -27,6 +27,8 @@ namespace ir { // The MulLstmFusePass and MulLstmFusePass will fuse to the same FusionLstm op. // Just FC without bias +class Graph; + class FCLstmFusePass : public FusePassBase { public: virtual ~FCLstmFusePass() {} diff --git a/paddle/fluid/framework/ir/fuse_bn_act_pass.cc b/paddle/fluid/framework/ir/fuse_bn_act_pass.cc index 54c05046a2c..db3c711201d 100644 --- a/paddle/fluid/framework/ir/fuse_bn_act_pass.cc +++ b/paddle/fluid/framework/ir/fuse_bn_act_pass.cc @@ -15,12 +15,17 @@ #include "paddle/fluid/framework/ir/fuse_bn_act_pass.h" #include #include -#include -#include -#include #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cudnn_helper.h" #endif diff --git a/paddle/fluid/framework/ir/fuse_bn_act_pass.h b/paddle/fluid/framework/ir/fuse_bn_act_pass.h index 427ff03a803..7e5f046ecaa 100644 --- a/paddle/fluid/framework/ir/fuse_bn_act_pass.h +++ b/paddle/fluid/framework/ir/fuse_bn_act_pass.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -30,6 +31,9 @@ namespace ir { /* * Fuse the BatchNorm and activation. */ +class Graph; +class Node; + class FuseBatchNormActPass : public FusePassBase { public: virtual ~FuseBatchNormActPass() {} diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h index dc73f1fda03..d9356b7bd72 100644 --- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h +++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -29,6 +30,9 @@ namespace ir { /* * Fuse the ElewiseAdd and activation */ +class Graph; +class Node; + class FuseElewiseAddActPass : public FusePassBase { public: virtual ~FuseElewiseAddActPass() {} diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index c284c1f4587..0094b674c2a 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -11,19 +11,22 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include -#include -#include -#include +#include "glog/logging.h" #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" -#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace framework { namespace ir { +class Node; + class FuseAdamOpPass : public FuseOptimizerOpPass { private: const std::string GetOpType() const { return "adam"; } diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index 43ec8bff5ed..f87d31cbc40 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -12,18 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include -#include -#include +#include "glog/logging.h" #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" -#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace framework { namespace ir { +class Node; + class FuseMomentumOpPass : public FuseOptimizerOpPass { private: virtual const std::string GetOpType() const { return "momentum"; } diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h index 0b5bf8a3a4b..5b7e1b7d384 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h @@ -19,14 +19,25 @@ #include #include #include + #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph.h" +namespace paddle { +namespace framework { +class BlockDesc; +class VarDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + constexpr char kGrad[] = "Grad"; constexpr char kParam[] = "Param"; constexpr char kLearningRate[] = "LearningRate"; diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc index 70d4d2b8652..720d252c9a6 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc @@ -11,17 +11,21 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include #include -#include -#include +#include "glog/logging.h" #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" -#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/platform/enforce.h" + namespace paddle { namespace framework { namespace ir { +class Node; + class FuseSgdOpPass : public FuseOptimizerOpPass { private: virtual const std::string GetOpType() const { return "sgd"; } diff --git a/paddle/fluid/framework/ir/fuse_pass_base.cc b/paddle/fluid/framework/ir/fuse_pass_base.cc index e6fb1302e27..f3db4f02b1c 100644 --- a/paddle/fluid/framework/ir/fuse_pass_base.cc +++ b/paddle/fluid/framework/ir/fuse_pass_base.cc @@ -15,10 +15,18 @@ #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; + void FusePassBase::Init(const std::string& repr, Graph* graph) const { repr_ = repr; graph_ = graph; diff --git a/paddle/fluid/framework/ir/fuse_pass_base.h b/paddle/fluid/framework/ir/fuse_pass_base.h index 3a1022bbcbd..ce7635bb35c 100644 --- a/paddle/fluid/framework/ir/fuse_pass_base.h +++ b/paddle/fluid/framework/ir/fuse_pass_base.h @@ -15,14 +15,24 @@ #pragma once #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + static const char kParamScopeAttr[] = "__param_scope__"; static const char kFuseStatisAttr[] = "__fuse_statis__"; // When we use trt or other third_party lib, the parameters are managed by diff --git a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h index d37c153dd2a..0b1dfaa6928 100644 --- a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h +++ b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h @@ -15,6 +15,7 @@ #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -27,6 +28,8 @@ namespace ir { /* * Fuse the relu and depthwise conv */ +class Graph; + class FuseReluDepthwiseConvPass : public FusePassBase { public: virtual ~FuseReluDepthwiseConvPass() {} diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator.h b/paddle/fluid/framework/ir/fusion_group/code_generator.h index 21773f239b9..15d21cf6829 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator.h +++ b/paddle/fluid/framework/ir/fusion_group/code_generator.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ir/fusion_group/code_generator_helper.h" #include "paddle/fluid/framework/ir/fusion_group/subgraph.h" @@ -27,6 +28,8 @@ namespace framework { namespace ir { namespace fusion_group { +class SubGraph; + class CodeGenerator { public: CodeGenerator(); diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc index ebc89b14c26..2a7a0748cf0 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc @@ -12,18 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/fusion_group/code_generator.h" #include #include #include #include + +#include "paddle/fluid/framework/ir/fusion_group/code_generator.h" #include "paddle/fluid/framework/ir/fusion_group/operation.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/operators/math.h" #include "paddle/fluid/platform/device_code.h" #include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/init.h" + +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +} // namespace paddle #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h b/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h index 0861c2f7e96..96b38f65013 100644 --- a/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h +++ b/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h @@ -15,9 +15,18 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h index 3438783c180..5ca785846a5 100644 --- a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h +++ b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fusion_group/subgraph.h" @@ -23,6 +24,11 @@ namespace paddle { namespace framework { namespace ir { +class Graph; +namespace fusion_group { +class SubGraph; +} // namespace fusion_group + class FusionGroupPass : public FusePassBase { protected: void ApplyImpl(Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/fusion_group/operation.h b/paddle/fluid/framework/ir/fusion_group/operation.h index 74abbdaad0b..d99fe737504 100644 --- a/paddle/fluid/framework/ir/fusion_group/operation.h +++ b/paddle/fluid/framework/ir/fusion_group/operation.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index 137ab7a56d7..593ac214e56 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -25,6 +25,13 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/variant.h" +namespace paddle { +namespace framework { +class OpDesc; +class VarDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/graph_helper.h b/paddle/fluid/framework/ir/graph_helper.h index 074ad320fb1..0c43febca70 100644 --- a/paddle/fluid/framework/ir/graph_helper.h +++ b/paddle/fluid/framework/ir/graph_helper.h @@ -28,6 +28,8 @@ namespace framework { namespace ir { // Compare nodes via node id. +class Graph; + struct NodeComp { bool operator()(ir::Node *const &node1, ir::Node *const &node2) const { return node1->id() < node2->id(); diff --git a/paddle/fluid/framework/ir/graph_helper_test.cc b/paddle/fluid/framework/ir/graph_helper_test.cc index d8973d5aeda..0a2dcfed000 100644 --- a/paddle/fluid/framework/ir/graph_helper_test.cc +++ b/paddle/fluid/framework/ir/graph_helper_test.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/graph.h" -#include #include "gtest/gtest.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 053c1fe832b..7116b8a2a6f 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -27,11 +27,21 @@ #include #include #include + #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/inference/analysis/dot.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc index 6c466fb21fb..5ac5a5d9839 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc @@ -12,14 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" - #include +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" + namespace paddle { namespace framework { namespace ir { +class Node; + void BuildGraph(Graph* g) { ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation); ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation); diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.h b/paddle/fluid/framework/ir/graph_to_program_pass.h index 52c8f4e0fca..6b17c0076f6 100644 --- a/paddle/fluid/framework/ir/graph_to_program_pass.h +++ b/paddle/fluid/framework/ir/graph_to_program_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + const char kGraphToProgramVarsToRemove[] = "__graph_to_program_vars_to_remove__"; const char kGraphToProgramSortKind[] = "__graph_to_program_sort_kind__"; diff --git a/paddle/fluid/framework/ir/graph_to_program_pass_test.cc b/paddle/fluid/framework/ir/graph_to_program_pass_test.cc index 5ee6b8a5f1e..80d7839d700 100644 --- a/paddle/fluid/framework/ir/graph_to_program_pass_test.cc +++ b/paddle/fluid/framework/ir/graph_to_program_pass_test.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/graph_to_program_pass.h" - #include #include #include @@ -25,6 +24,8 @@ namespace paddle { namespace framework { namespace ir { +class Node; + void BuildNoCircleGraph(Graph* g) { OpDesc op1; op1.SetType("op1"); diff --git a/paddle/fluid/framework/ir/graph_traits.cc b/paddle/fluid/framework/ir/graph_traits.cc index 4b403c46260..3fa84554d99 100644 --- a/paddle/fluid/framework/ir/graph_traits.cc +++ b/paddle/fluid/framework/ir/graph_traits.cc @@ -13,10 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/graph_traits.h" - #include -#include -#include namespace paddle { namespace framework { @@ -25,6 +22,8 @@ namespace ir { // // NodesDFSIterator // +class Node; + NodesDFSIterator::NodesDFSIterator(const std::vector &source) { for (auto *x : source) stack_.push(x); } diff --git a/paddle/fluid/framework/ir/graph_traits.h b/paddle/fluid/framework/ir/graph_traits.h index bb4212bcd33..a54cc61a63f 100644 --- a/paddle/fluid/framework/ir/graph_traits.h +++ b/paddle/fluid/framework/ir/graph_traits.h @@ -26,6 +26,9 @@ namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + template class iterator_range { IteratorT begin_, end_; diff --git a/paddle/fluid/framework/ir/graph_viz_pass.h b/paddle/fluid/framework/ir/graph_viz_pass.h index 7091aa6a95b..118c1bc6f3c 100644 --- a/paddle/fluid/framework/ir/graph_viz_pass.h +++ b/paddle/fluid/framework/ir/graph_viz_pass.h @@ -28,6 +28,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + const char kGraphvizMarkedNodeAttr[] = "__graphviz__marked_node__"; class GraphVizPass : public Pass { diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc index c8dfa02f469..08d09fce5de 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { FusePassBase::Init("identity_scale_op_clean", graph); diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h index d66b411257e..7e3d4e19fa8 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class IdentityScaleOpCleanPass : public FusePassBase { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/is_test_pass.cc b/paddle/fluid/framework/ir/is_test_pass.cc index bf6fe999c1e..9c1640efcd8 100644 --- a/paddle/fluid/framework/ir/is_test_pass.cc +++ b/paddle/fluid/framework/ir/is_test_pass.cc @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + void IsTestPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Sets is_test attrbiute to true and if it is missing, inserts it " "for activations and pooling."; diff --git a/paddle/fluid/framework/ir/is_test_pass.h b/paddle/fluid/framework/ir/is_test_pass.h index 80cedbf9f85..abf48480d71 100644 --- a/paddle/fluid/framework/ir/is_test_pass.h +++ b/paddle/fluid/framework/ir/is_test_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class IsTestPass : public Pass { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/lock_free_optimize_pass.h b/paddle/fluid/framework/ir/lock_free_optimize_pass.h index f38f48fcd92..26ec61fd36e 100644 --- a/paddle/fluid/framework/ir/lock_free_optimize_pass.h +++ b/paddle/fluid/framework/ir/lock_free_optimize_pass.h @@ -27,6 +27,7 @@ namespace framework { namespace ir { class Node; +class Graph; /* * Remove the sum op of all gradients of the backward op. diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc index b1afa47910f..b12b84d4a49 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc @@ -12,24 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include -#include -#include -#include + +#include "glog/logging.h" #include "paddle/fluid/framework/details/computation_op_handle.h" -#include "paddle/fluid/framework/details/multi_devices_helper.h" -#include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" -#include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h" -#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" #include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace framework { namespace ir { +class Graph; + using OpHandleBase = details::OpHandleBase; using ComputationOpHandle = details::ComputationOpHandle; using VarHandle = details::VarHandle; diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index ce7f27d2755..0cdde5c757a 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include #include @@ -22,13 +23,15 @@ #include "paddle/fluid/framework/details/share_tensor_buffer_op_handle.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h" -#include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" #include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace framework { namespace ir { +class Graph; + class BufferSharedInplaceOpPass : public MemoryReusePass { protected: std::string ReuseType() const override { return "inplace"; } diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc index 3e3b9864a7b..72e29dfe156 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc @@ -16,10 +16,15 @@ #include #include -#include -#include -#include -#include + +namespace paddle { +namespace framework { +namespace details { +class ComputationOpHandle; +class ShareTensorBufferOpHandle; +} // namespace details +} // namespace framework +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h index 1c0c6ae6020..4a77d116f1e 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.h @@ -28,6 +28,17 @@ #include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" #include "paddle/fluid/framework/ir/pass.h" +namespace paddle { +namespace framework { +class VarDesc; +namespace details { +class ComputationOpHandle; +class ShareTensorBufferOpHandle; +struct VarHandle; +} // namespace details +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { @@ -71,6 +82,8 @@ namespace ir { * a pass to clean all ShareTensorBufferOpHandles and move sharing to * ComputationOpHandle::Run() in the future. */ +class Graph; + class MemoryReusePass : public Pass { protected: void ApplyImpl(Graph *graph) const final; diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc index 11c2508afb5..7de62d6e482 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc @@ -13,8 +13,14 @@ // limitations under the License. #include "paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h" -#include -#include + +namespace paddle { +namespace framework { +namespace details { +class OpHandleBase; +} // namespace details +} // namespace framework +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h index 5fb2caedba8..d6f286afc55 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h @@ -18,8 +18,17 @@ #include #include #include + #include "paddle/fluid/framework/details/op_handle_base.h" +namespace paddle { +namespace framework { +namespace details { +class OpHandleBase; +} // namespace details +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.cc b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.cc index ed87f73adf1..e85be0272de 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.cc @@ -13,8 +13,6 @@ // limitations under the License. #include "paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h" -#include "paddle/fluid/framework/details/var_handle.h" -#include "paddle/fluid/framework/var_desc.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h index 0e8f4e78d22..d00e4f53022 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.h @@ -30,6 +30,9 @@ namespace paddle { namespace framework { class VarDesc; +namespace details { +struct VarHandle; +} // namespace details namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index 45ff275d530..a5beec87c39 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -13,14 +13,21 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" -#include #include #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; + void ConvActivationFusePass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL( graph, platform::errors::InvalidArgument("Graph cannot be nullptr.")); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h index ac15fc04512..be6b1e07c02 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /* * Fuse Conv and Activation base class. */ +class Graph; + class ConvActivationFusePass : public FusePassBase { public: virtual ~ConvActivationFusePass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc index f7a8e3e3f6c..63524294b68 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc @@ -14,7 +14,6 @@ #include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" #include -#include #include #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h index 833fbc748eb..9a83310ebfb 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h @@ -13,16 +13,20 @@ // limitations under the License. #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/pass.h" + namespace paddle { namespace framework { namespace ir { /* * Fuse the Conv and Elementwise_add to a ConvBiasOp. */ +class Graph; + class ConvBiasFusePass : public FusePassBase { public: virtual ~ConvBiasFusePass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc index af64cb22054..5fadd9607e9 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + void ConvConcatReLUFusePass::FindConcatWithConvs( ir::Graph* graph, std::unordered_map* concat_with_convs_counter) const { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h index 91ff0760f04..f1faa84f3d5 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -30,6 +31,8 @@ namespace ir { * to a: * (multi ConvReLU) -> Concat -> next_op. */ +class Graph; + class ConvConcatReLUFusePass : public FusePassBase { public: virtual ~ConvConcatReLUFusePass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h index b95aec34d30..2ba4c80678f 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h @@ -28,6 +28,13 @@ namespace paddle { namespace framework { namespace ir { +class Graph; +class GraphPatternDetector; +class Node; +namespace patterns { +struct Conv; +} // namespace patterns + using graph_ptr = ir::Graph*; using GraphWithStats = std::pair; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index aa0979b4be6..0254b5e7573 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -13,12 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" -#include + #include #include #include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/platform/errors.h" + #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h index 21219e7dca8..bd87b31b781 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -31,6 +32,9 @@ namespace ir { * bool denotes whether quantization of the variable should be done to unsigned * type. */ +class Graph; +class Node; + using VarQuantScale = std::unordered_map>; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc index bc268a83478..2146d833ddf 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.cc @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" -#include #include namespace paddle { namespace framework { namespace ir { +class Graph; + void CPUQuantizePlacementPass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Marks operators which are to be quantized."; const auto& excluded_ids_list = diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h index f3229e59d6f..474fa63f60e 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /* * Specifies which operators should be quantized. */ +class Graph; + class CPUQuantizePlacementPass : public FusePassBase { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc index bc24c10d9d0..54ab244a99b 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc @@ -14,9 +14,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h" -#include + #include #include + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/pretty_log.h" @@ -24,6 +25,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + using string::PrettyLogDetail; void CPUQuantizeSquashPass::FindNodesToKeep( diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h index 98a518e4e53..d1465f9da5c 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -29,6 +30,8 @@ namespace ir { /* * Squash dequantize->quantize pair pattern into requantize op */ +class Graph; + class CPUQuantizeSquashPass : public FusePassBase { public: virtual ~CPUQuantizeSquashPass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc index df5ba3314e6..b2c0afdc754 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -20,6 +21,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + #define GET_NODE(id, pattern) \ PADDLE_ENFORCE_NE(subgraph.count(pattern.RetrieveNode(#id)), 0, \ platform::errors::InvalidArgument( \ diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h index ca314afde57..0f4ecc71ad7 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class DepthwiseConvMKLDNNPass : public FusePassBase { public: virtual ~DepthwiseConvMKLDNNPass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc index 95afc548376..6efa9f6b749 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.cc @@ -13,18 +13,21 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h" -#include -#include -#include -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor.h" + #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; + void FCMKLDNNPass::ApplyImpl(ir::Graph* graph) const { PADDLE_ENFORCE_NOT_NULL(graph, platform::errors::InvalidArgument( diff --git a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h index 97c6b242989..df02250394a 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/fc_mkldnn_pass.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -25,6 +26,8 @@ namespace ir { /* * Transpose weights of FC to comply with MKL-DNN interface */ +class Graph; + class FCMKLDNNPass : public FusePassBase { public: virtual ~FCMKLDNNPass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h index 77e30b35346..ef469bac40c 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -22,6 +23,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class MatmulTransposeReshapeMKLDNNPass : public FusePassBase { public: virtual ~MatmulTransposeReshapeMKLDNNPass() {} @@ -30,6 +33,6 @@ class MatmulTransposeReshapeMKLDNNPass : public FusePassBase { void ApplyImpl(Graph* graph) const override; const std::string name_scope_{"matmul_transpose_reshape_fuse"}; }; -} +} // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h index de699430189..ca56a8900ca 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/placement_pass_base.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h index eab9f095623..7a53b3c4984 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -25,6 +26,8 @@ namespace ir { /* * Fuse Reshape->Transpose->MatMul when MatMul uses mkldnn. */ +class Graph; + class ReshapeTransposeMatmulMkldnnFusePass : public FusePassBase { public: virtual ~ReshapeTransposeMatmulMkldnnFusePass() {} diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc index 6c87e437caa..790821e3fa4 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" + #include #include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/string/pretty_log.h" @@ -22,6 +24,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + using string::PrettyLogDetail; void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const { diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h index fe97b9681ce..32ff78d9a73 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class ScaleMatmulFusePass : public FusePassBase { public: virtual ~ScaleMatmulFusePass() {} diff --git a/paddle/fluid/framework/ir/multi_batch_merge_pass.h b/paddle/fluid/framework/ir/multi_batch_merge_pass.h index a89616683d9..ae2e68c6003 100644 --- a/paddle/fluid/framework/ir/multi_batch_merge_pass.h +++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.h @@ -31,6 +31,8 @@ namespace ir { // sync training, we can simulate even large batch size as if we have more // GPUs. +class Graph; + class BatchMergePass : public Pass { public: virtual ~BatchMergePass() {} diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h index bb6c8079074..bb3586ba804 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h @@ -20,17 +20,32 @@ #include #include #include + #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph.h" +namespace paddle { +namespace framework { +namespace details { +class OpHandleBase; +struct VarHandle; +} // namespace details +namespace ir { +class Graph; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace platform { class NCCLContextMap; +class NCCLCommunicator; } namespace framework { class Scope; + namespace ir { constexpr char kLossVarName[] = "loss_var_name"; diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc index bcbd1e066cc..78e90f82bfb 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/framework/op_proto_maker.h" + +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h index 0038790cae2..d37b21f0584 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h @@ -17,10 +17,18 @@ #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { +class Graph; + void InitReaderQueueDeviceCount(Graph *graph, const Scope &scope, size_t dev_cnt); diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 9d2b4ebaf8c..d1fbc8396ba 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h" -#include + #include #include #include -#include "paddle/fluid/framework/ddim.h" + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h index 0afa00fc62a..f5327dc7108 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h @@ -16,10 +16,19 @@ #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/node.cc b/paddle/fluid/framework/ir/node.cc index 45d81b93739..7143c9a7a3e 100644 --- a/paddle/fluid/framework/ir/node.cc +++ b/paddle/fluid/framework/ir/node.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/node.h" -#include "paddle/fluid/framework/op_info.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index 87e7e64acb7..d0db3bd36e1 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -19,10 +19,18 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace framework { +class OpDesc; +class VarDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index a5ca13f1ce2..0e5f5867f47 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -14,11 +14,15 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/pass.h" -#include -#include - #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { +namespace ir { +class Graph; +} // namespace ir +} // namespace framework +} // namespace paddle #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif diff --git a/paddle/fluid/framework/ir/pass.h b/paddle/fluid/framework/ir/pass.h index 0f5ef551f04..668dc74eab2 100644 --- a/paddle/fluid/framework/ir/pass.h +++ b/paddle/fluid/framework/ir/pass.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/program_desc.h" @@ -29,6 +30,7 @@ limitations under the License. */ namespace paddle { namespace framework { namespace ir { +class Graph; template struct PassRegistrar; diff --git a/paddle/fluid/framework/ir/pass_builder.cc b/paddle/fluid/framework/ir/pass_builder.cc index 6457bd230c5..4e99271a2ec 100644 --- a/paddle/fluid/framework/ir/pass_builder.cc +++ b/paddle/fluid/framework/ir/pass_builder.cc @@ -20,6 +20,8 @@ namespace paddle { namespace framework { namespace ir { +class Pass; + std::shared_ptr PassBuilder::AppendPass(const std::string& pass_type) { VLOG(1) << "Append " << pass_type; auto pass = ir::PassRegistry::Instance().Get(pass_type); diff --git a/paddle/fluid/framework/ir/pass_builder.h b/paddle/fluid/framework/ir/pass_builder.h index 733d3a3ad1a..0e68767db3f 100644 --- a/paddle/fluid/framework/ir/pass_builder.h +++ b/paddle/fluid/framework/ir/pass_builder.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include "paddle/fluid/framework/ir/pass.h" @@ -22,6 +23,8 @@ namespace paddle { namespace framework { namespace ir { +class Pass; + class PassBuilder { public: PassBuilder() {} diff --git a/paddle/fluid/framework/ir/pass_test.cc b/paddle/fluid/framework/ir/pass_test.cc index 0c5286b3f77..65b9c427869 100644 --- a/paddle/fluid/framework/ir/pass_test.cc +++ b/paddle/fluid/framework/ir/pass_test.cc @@ -13,15 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/pass.h" -#include + #include -#include + #include "gtest/gtest.h" -#include "paddle/fluid/framework/ir/graph.h" namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + void BuildCircleGraph(Graph* g) { ir::Node* o1 = g->CreateEmptyNode("op1", Node::Type::kOperation); ir::Node* o2 = g->CreateEmptyNode("op2", Node::Type::kOperation); diff --git a/paddle/fluid/framework/ir/placement_pass_base.h b/paddle/fluid/framework/ir/placement_pass_base.h index 5cdd7963e54..ef1a920db3f 100644 --- a/paddle/fluid/framework/ir/placement_pass_base.h +++ b/paddle/fluid/framework/ir/placement_pass_base.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/pass.h" namespace paddle { @@ -25,6 +26,8 @@ namespace ir { /* * Specifies which operators should use cuDNN. */ +class Graph; + class PlacementPassBase : public Pass { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index 9f6032ffa5b..af4a2f40605 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -13,10 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h" -#include // for max +#include #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h index ae777bccebe..0be217cc748 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /** * Fuse Repeated FC Relu */ +class Graph; + class RepeatedFCReluFusePass : public FusePassBase { public: virtual ~RepeatedFCReluFusePass() {} diff --git a/paddle/fluid/framework/ir/runtime_context_cache_pass.h b/paddle/fluid/framework/ir/runtime_context_cache_pass.h index e4783166e0c..741adcce8d7 100644 --- a/paddle/fluid/framework/ir/runtime_context_cache_pass.h +++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.h @@ -15,12 +15,15 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/ir/pass.h" namespace paddle { namespace framework { namespace ir { +class Graph; + class RuntimeContextCachePass : public Pass { protected: void ApplyImpl(ir::Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc index 19ec2d818a3..7daa9b5eff7 100644 --- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc @@ -16,10 +16,7 @@ #include #include #include -#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/graph_viz_pass.h" -#include "paddle/fluid/framework/lod_tensor.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h index d68840a5547..a7041153645 100644 --- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h +++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h @@ -22,6 +22,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class SeqConcatFcFusePass : public FusePassBase { public: virtual ~SeqConcatFcFusePass() {} diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc index 75ab04f1b91..d9a1348e05a 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h" #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -22,6 +23,8 @@ namespace paddle { namespace framework { namespace ir { +class Node; + int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) { GraphPatternDetector gpd; auto* pattern = gpd.mutable_pattern(); diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h index fde9b586c85..6f623625f51 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -23,6 +24,8 @@ namespace paddle { namespace framework { namespace ir { +class Graph; + class SeqConvEltAddReluFusePass : public FusePassBase { public: virtual ~SeqConvEltAddReluFusePass() {} diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc index 1c220ee4d57..b6badf745c6 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc @@ -16,7 +16,14 @@ #include #include #include -#include "paddle/fluid/framework/lod_tensor.h" + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle #define MAX_CONCAT_INPUTS 200 diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h index 40a9edc5e64..482fd5cb580 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -37,6 +38,8 @@ namespace ir { * FusionSeqPoolConcat * | */ +class Graph; + class SeqPoolConcatFusePass : public FusePassBase { public: virtual ~SeqPoolConcatFusePass() {} diff --git a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc index 8261bfc1534..d639d410466 100644 --- a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc @@ -13,16 +13,17 @@ * limitations under the License. */ #include "paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h" -#include #include #include #include -#include "paddle/fluid/framework/lod_tensor.h" namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + namespace { static PDNode* BuildCVMConcatPattern(PDPattern* pattern) { auto cvm_behind_x = [](Node* x) -> bool { diff --git a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h index 88a41983c6b..b0a3573fb59 100644 --- a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h +++ b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -39,6 +40,8 @@ namespace ir { * FusionSeqPoolCVMConcat * | */ +class Graph; + class SeqPoolCVMConcatFusePass : public FusePassBase { public: virtual ~SeqPoolCVMConcatFusePass() {} diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index 8bdf3940928..92e995579fa 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - -#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.h b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.h index 008f8013efd..dc375988cdd 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.h +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" diff --git a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc index 2e5c18d3352..5cc6b6171ac 100644 --- a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc +++ b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { namespace framework { @@ -30,6 +29,8 @@ namespace ir { * - remove dropout_op (upscale_in_train) or * replace dropout_op with scale_op (downgrade_in_infer) when is_test is true */ +class Graph; + void SimplifyWithBasicOpsPass::ApplyImpl(Graph* graph) const { VLOG(3) << "Simplify the Graph with basic ops."; std::unordered_set del_node_set; diff --git a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h index f5185622468..6a245c444a7 100644 --- a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h +++ b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h @@ -16,12 +16,16 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/pass.h" namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + class SimplifyWithBasicOpsPass : public Pass { protected: void ApplyImpl(Graph* graph) const override; diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc index 2e3cd16d5ce..e5f348dfeb1 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h" + #include #include -#include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h index 2de8d376221..3a3e5005239 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h @@ -29,6 +29,8 @@ namespace ir { // | | // other_op3 // | +class Graph; + class SkipLayerNormFusePass : public FusePassBase { public: virtual ~SkipLayerNormFusePass() {} diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h index 56b7ec9b843..90def957df4 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -26,6 +27,8 @@ namespace ir { /** * Fuse ( (A * B).^2 - (A.^2 * B.^2) ) .* scalar */ +class Graph; + class SquaredMatSubFusePass : public FusePassBase { public: virtual ~SquaredMatSubFusePass() {} diff --git a/paddle/fluid/framework/ir/subgraph_detector.cc b/paddle/fluid/framework/ir/subgraph_detector.cc index 7979953d7be..6ebe900e26b 100644 --- a/paddle/fluid/framework/ir/subgraph_detector.cc +++ b/paddle/fluid/framework/ir/subgraph_detector.cc @@ -13,18 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/subgraph_detector.h" -#include #include #include #include -#include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/node.h" namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + std::pair, std::vector> ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT std::unordered_set nodes(graph.begin(), graph.end()); diff --git a/paddle/fluid/framework/ir/subgraph_detector.h b/paddle/fluid/framework/ir/subgraph_detector.h index 3d279e27e6a..6bd73c758b3 100644 --- a/paddle/fluid/framework/ir/subgraph_detector.h +++ b/paddle/fluid/framework/ir/subgraph_detector.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/node.h" @@ -24,6 +25,9 @@ namespace paddle { namespace framework { namespace ir { +class Graph; +class Node; + const char kIsFunctionNode[] = "__is_function_node__"; const char kFunctionNodeSubGraph[] = "__function_node_sub_graph__"; const char kSubgraphSplitterMarkerAttrName[] = diff --git a/paddle/fluid/framework/ir/sync_batch_norm_pass.cc b/paddle/fluid/framework/ir/sync_batch_norm_pass.cc index 222c73761b4..3fa008c300c 100644 --- a/paddle/fluid/framework/ir/sync_batch_norm_pass.cc +++ b/paddle/fluid/framework/ir/sync_batch_norm_pass.cc @@ -12,15 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include #include "paddle/fluid/framework/ir/pass.h" namespace paddle { namespace framework { namespace ir { +class Graph; + class SyncBatchNormPass : public Pass { protected: void ApplyImpl(ir::Graph *graph) const override { diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index 405cefa99eb..2db6d0230e3 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include #include #include diff --git a/paddle/fluid/framework/lod_rank_table.h b/paddle/fluid/framework/lod_rank_table.h index 8c6e8b0c66e..9a7c1285e30 100644 --- a/paddle/fluid/framework/lod_rank_table.h +++ b/paddle/fluid/framework/lod_rank_table.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/lod_tensor.h" namespace paddle { diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index 40615d772e5..a044812dd31 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -12,19 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/lod_tensor.h" #include -#include #include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/version.h" -#include "paddle/fluid/memory/memcpy.h" -#include "paddle/fluid/memory/memory.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index da97efb6168..e09a628f491 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -24,6 +24,7 @@ limitations under the License. */ #endif #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/tensor.h" @@ -31,6 +32,15 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc index c93c3f2673b..e3223e67fc9 100644 --- a/paddle/fluid/framework/lod_tensor_test.cc +++ b/paddle/fluid/framework/lod_tensor_test.cc @@ -14,9 +14,6 @@ #include #include -#include -#include -#include #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/framework/mixed_vector.h b/paddle/fluid/framework/mixed_vector.h index 280996d34dd..3a6e80f718d 100644 --- a/paddle/fluid/framework/mixed_vector.h +++ b/paddle/fluid/framework/mixed_vector.h @@ -20,14 +20,14 @@ limitations under the License. */ #include // NOLINT #include #include + +#include "glog/logging.h" #include "paddle/fluid/framework/details/cow_ptr.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" -#include "glog/logging.h" - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/mixed_vector_test.cc b/paddle/fluid/framework/mixed_vector_test.cc index 0599c8d3846..a40a3ff33fe 100644 --- a/paddle/fluid/framework/mixed_vector_test.cc +++ b/paddle/fluid/framework/mixed_vector_test.cc @@ -12,11 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include - +#include "paddle/fluid/framework/mixed_vector.h" #include "glog/logging.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/mixed_vector.h" template using vec = paddle::framework::Vector; diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 81402a650a3..75677ef5243 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -14,8 +14,10 @@ #pragma once +#include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" @@ -28,6 +30,10 @@ namespace framework { * Simple, intuitive and effective. Only single thread is supported, and * currently designed for inference. */ +class LoDTensor; +class ProgramDesc; +class Scope; + class NaiveExecutor { public: explicit NaiveExecutor(const platform::Place& place) : place_(place) {} diff --git a/paddle/fluid/framework/no_need_buffer_vars_inference.h b/paddle/fluid/framework/no_need_buffer_vars_inference.h index 5d30f34090e..21ba0381fe6 100644 --- a/paddle/fluid/framework/no_need_buffer_vars_inference.h +++ b/paddle/fluid/framework/no_need_buffer_vars_inference.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/op_call_stack.h b/paddle/fluid/framework/op_call_stack.h index d48cf27285a..f633538e700 100644 --- a/paddle/fluid/framework/op_call_stack.h +++ b/paddle/fluid/framework/op_call_stack.h @@ -15,9 +15,16 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/platform/enforce.h" +namespace paddle { +namespace platform { +struct EnforceNotMet; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/op_compatible_info.h b/paddle/fluid/framework/op_compatible_info.h index 08b5734b5bf..01fbdef99cb 100644 --- a/paddle/fluid/framework/op_compatible_info.h +++ b/paddle/fluid/framework/op_compatible_info.h @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/program_desc.h" #pragma once @@ -21,6 +22,10 @@ namespace paddle { namespace framework { +namespace proto { +class OpCompatibleMap; +} // namespace proto + enum class OpCompatibleType { compatible = 0, // support previous version DEFIN_NOT = 1, // definitely can't support previous version diff --git a/paddle/fluid/framework/op_compatible_info_test.cc b/paddle/fluid/framework/op_compatible_info_test.cc index 43959c8b2ab..98f3f5071ad 100644 --- a/paddle/fluid/framework/op_compatible_info_test.cc +++ b/paddle/fluid/framework/op_compatible_info_test.cc @@ -13,14 +13,17 @@ // limitations under the License. #include "paddle/fluid/framework/op_compatible_info.h" -#include + #include "gtest/gtest.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/platform/macros.h" namespace paddle { namespace framework { +namespace proto { +class OpCompatibleMap; +} // namespace proto + TEST(test_op_compatible_info, test_op_compatible) { auto comp_map = OpCompatibleMap(); comp_map.InitOpCompatibleMap(); diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index e15f0012fdc..95c33bca6c7 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/var_desc.h" @@ -27,6 +28,7 @@ namespace framework { class BlockDesc; class ProgramDesc; + class OpDesc { public: OpDesc() {} diff --git a/paddle/fluid/framework/op_info.cc b/paddle/fluid/framework/op_info.cc index c815e194d43..820a83586b3 100644 --- a/paddle/fluid/framework/op_info.cc +++ b/paddle/fluid/framework/op_info.cc @@ -14,8 +14,6 @@ limitations under the License. */ #include "paddle/fluid/framework/op_info.h" #include -#include -#include namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index 89b49997579..af657232e91 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -20,13 +20,18 @@ limitations under the License. */ #include #include "paddle/fluid/framework/attribute.h" +#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/type_defs.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" namespace paddle { namespace framework { +class InferShapeContext; +class OpAttrChecker; + class InferShapeBase { public: virtual ~InferShapeBase() = default; diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index 9edc1a3e150..f4e60bb9b78 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/library_type.h" diff --git a/paddle/fluid/framework/op_kernel_type_test.cc b/paddle/fluid/framework/op_kernel_type_test.cc index 40db85400d2..2979750fba7 100644 --- a/paddle/fluid/framework/op_kernel_type_test.cc +++ b/paddle/fluid/framework/op_kernel_type_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_kernel_type.h" + #include -#include TEST(OpKernelType, ToString) { using OpKernelType = paddle::framework::OpKernelType; diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index a8030d377fd..56f940e3997 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -16,6 +16,12 @@ limitations under the License. */ #include "gtest/gtest.h" +namespace paddle { +namespace platform { +struct EnforceNotMet; +} // namespace platform +} // namespace paddle + class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { public: void Make() { diff --git a/paddle/fluid/framework/op_registry.cc b/paddle/fluid/framework/op_registry.cc index 81cfaf92e39..72dd6fa6bbd 100644 --- a/paddle/fluid/framework/op_registry.cc +++ b/paddle/fluid/framework/op_registry.cc @@ -14,8 +14,6 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 6408fadf90a..77383386fa1 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -35,6 +35,12 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/shape_inference.h" +namespace paddle { +namespace framework { +class ExecutionContext; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/op_version_registry_test.cc b/paddle/fluid/framework/op_version_registry_test.cc index 239dbc43578..d6b18751cef 100644 --- a/paddle/fluid/framework/op_version_registry_test.cc +++ b/paddle/fluid/framework/op_version_registry_test.cc @@ -12,7 +12,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index bd52d7ffef5..d493f350e69 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -39,6 +39,15 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/variant.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpInfo; +class Scope; +class Variable; +} // namespace framework +} // namespace paddle + DECLARE_int32(inner_op_parallelism); namespace paddle { @@ -105,8 +114,8 @@ inline std::string GradOriginalVarName(const std::string& grad_var_name) { const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var); Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var); -class OperatorBase; class ExecutionContext; +class OperatorBase; class RuntimeContext { public: diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h index d5e9c755f12..5cafc9111da 100644 --- a/paddle/fluid/framework/program_desc.h +++ b/paddle/fluid/framework/program_desc.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/proto_desc.h" diff --git a/paddle/fluid/framework/program_desc_test.cc b/paddle/fluid/framework/program_desc_test.cc index 48bde2785e6..0ba1099b032 100644 --- a/paddle/fluid/framework/program_desc_test.cc +++ b/paddle/fluid/framework/program_desc_test.cc @@ -13,11 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" + #include "gtest/gtest.h" -#include "paddle/fluid/framework/block_desc.h" namespace paddle { namespace framework { +class VarDesc; + TEST(ProgramDesc, copy_ctor) { ProgramDesc program; auto* global_block = program.MutableBlock(0); diff --git a/paddle/fluid/framework/prune.h b/paddle/fluid/framework/prune.h index 63e8aaad735..57f282536bf 100644 --- a/paddle/fluid/framework/prune.h +++ b/paddle/fluid/framework/prune.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/prune_test.cc b/paddle/fluid/framework/prune_test.cc index 12fa0c61f81..618eaba3c5b 100644 --- a/paddle/fluid/framework/prune_test.cc +++ b/paddle/fluid/framework/prune_test.cc @@ -15,13 +15,9 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" #include -#include #include #include -#include "paddle/fluid/framework/attribute.h" -#include "paddle/fluid/framework/operator.h" - #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/framework/pull_dense_worker.cc b/paddle/fluid/framework/pull_dense_worker.cc index 9f4c817db7d..c399c5d02eb 100644 --- a/paddle/fluid/framework/pull_dense_worker.cc +++ b/paddle/fluid/framework/pull_dense_worker.cc @@ -12,12 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/device_worker.h" -#include "paddle/fluid/framework/fleet/fleet_wrapper.h" namespace paddle { namespace framework { +class LoDTensor; +class Scope; +class Variable; + std::shared_ptr PullDenseWorker::s_instance_ = NULL; std::mutex PullDenseWorker::mutex_for_version_; std::map PullDenseWorker::last_versions_; diff --git a/paddle/fluid/framework/rw_lock_test.cc b/paddle/fluid/framework/rw_lock_test.cc index 16f9cbb6522..601b10787be 100644 --- a/paddle/fluid/framework/rw_lock_test.cc +++ b/paddle/fluid/framework/rw_lock_test.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/framework/rw_lock.h" #include -#include // NOLINT #include // NOLINT #include diff --git a/paddle/fluid/framework/save_load_util.h b/paddle/fluid/framework/save_load_util.h index 6b50c93ddbd..0b96e002e30 100644 --- a/paddle/fluid/framework/save_load_util.h +++ b/paddle/fluid/framework/save_load_util.h @@ -28,6 +28,8 @@ namespace paddle { namespace framework { +class Scope; + bool SaveStaticNameListToDisk( const std::string& file_name, const std::vector& vec_tensor_name_list, const Scope& scope); diff --git a/paddle/fluid/framework/save_load_util_test.cc b/paddle/fluid/framework/save_load_util_test.cc index 4a54e2d4668..10a34d7ce91 100644 --- a/paddle/fluid/framework/save_load_util_test.cc +++ b/paddle/fluid/framework/save_load_util_test.cc @@ -13,12 +13,9 @@ // limitations under the License. #include #include -#include -#include #include "gtest/gtest.h" #include "paddle/fluid/framework/save_load_util.h" -#include "paddle/fluid/platform/macros.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index db7010ecceb..922e9a9b272 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -30,6 +30,12 @@ extern "C" { #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope_pool.cc b/paddle/fluid/framework/scope_pool.cc index 4bb077a2c52..cf0b3ebcddd 100644 --- a/paddle/fluid/framework/scope_pool.cc +++ b/paddle/fluid/framework/scope_pool.cc @@ -11,10 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/framework/scope_pool.h" -#include "paddle/fluid/framework/threadpool.h" +#include namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope_pool.h b/paddle/fluid/framework/scope_pool.h index a8b468699ab..19faa9aa6a4 100644 --- a/paddle/fluid/framework/scope_pool.h +++ b/paddle/fluid/framework/scope_pool.h @@ -14,6 +14,7 @@ #pragma once +#include #include // NOLINT #include #include "paddle/fluid/framework/scope.h" @@ -21,6 +22,8 @@ namespace paddle { namespace framework { +class Scope; + class ScopePool { public: static ScopePool &Instance(); // NOLINT diff --git a/paddle/fluid/framework/scope_test.cc b/paddle/fluid/framework/scope_test.cc index 26817fc558d..a61e68279a2 100644 --- a/paddle/fluid/framework/scope_test.cc +++ b/paddle/fluid/framework/scope_test.cc @@ -13,9 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/scope.h" -#include "glog/logging.h" + #include "gtest/gtest.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + using paddle::framework::Scope; using paddle::framework::Variable; diff --git a/paddle/fluid/framework/selected_rows.cc b/paddle/fluid/framework/selected_rows.cc index 1f402ea9dd3..4c30c40ad58 100644 --- a/paddle/fluid/framework/selected_rows.cc +++ b/paddle/fluid/framework/selected_rows.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/selected_rows.h b/paddle/fluid/framework/selected_rows.h index 285af1d5530..48353b43f56 100644 --- a/paddle/fluid/framework/selected_rows.h +++ b/paddle/fluid/framework/selected_rows.h @@ -26,9 +26,18 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/memcpy.h" +namespace paddle { +namespace platform { +class DeviceContext; +class Place; +} // namespace platform +} // namespace paddle + namespace paddle { namespace framework { +class Tensor; + class SelectedRows { /* * @brief We can use the SelectedRows structure to reproduce a sparse table. diff --git a/paddle/fluid/framework/shape_inference.cc b/paddle/fluid/framework/shape_inference.cc index f5bb3f68007..02e4ce914b8 100644 --- a/paddle/fluid/framework/shape_inference.cc +++ b/paddle/fluid/framework/shape_inference.cc @@ -13,11 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/shape_inference.h" -#include -#include -#include -#include "paddle/fluid/framework/grad_op_desc_maker.h" -#include "paddle/fluid/framework/operator.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor.cc b/paddle/fluid/framework/tensor.cc index 0b22bab2678..9f5d8d30c9c 100644 --- a/paddle/fluid/framework/tensor.cc +++ b/paddle/fluid/framework/tensor.cc @@ -13,7 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/var_type.h" + +namespace paddle { +namespace memory { +namespace allocation { +class Allocation; +} // namespace allocation +} // namespace memory +} // namespace paddle namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index f2ccff2c133..faecba6295d 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/framework.pb.h" @@ -28,6 +29,14 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" +namespace paddle { +namespace memory { +namespace allocation { +class Allocation; +} // namespace allocation +} // namespace memory +} // namespace paddle + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index cc972dd93d0..92a29d5165c 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -13,9 +13,15 @@ // limitations under the License. #include "paddle/fluid/framework/tensor.h" + #include #include -#include "paddle/fluid/platform/float16.h" + +namespace paddle { +namespace platform { +struct float16; +} // namespace platform +} // namespace paddle namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 0e3d11b9f02..a073dbd733f 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -664,7 +664,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type()); auto* data_ptr = tensor.data(); - PADDLE_ENFORCE_LT(size, std::numeric_limits::max(), + PADDLE_ENFORCE_LT(size, (std::numeric_limits::max)(), platform::errors::ResourceExhausted( "tensor size %d overflow when writing tensor", size)); if (platform::is_gpu_place(tensor.place())) { diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index a0408dbc3db..1e811a41e90 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/dlpack_tensor.h" #include "paddle/fluid/framework/eigen.h" @@ -30,6 +31,8 @@ namespace framework { // If ctx_place and src_place are the same, src_ctx.Wait() is added // after memory::Copy; if ctx_place and dst_place are the same, // src_ctx.Wait() is added before memory::Copy. +class Tensor; + void TensorCopy(const Tensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, Tensor* dst); diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index ef9964b3ae7..e389cb34679 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/tensor_util.h" + #include #include -#include namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/threadpool.cc b/paddle/fluid/framework/threadpool.cc index 4682bfc264b..3db8f3e36b7 100644 --- a/paddle/fluid/framework/threadpool.cc +++ b/paddle/fluid/framework/threadpool.cc @@ -13,8 +13,6 @@ limitations under the License. */ #include "paddle/fluid/framework/threadpool.h" -#include -#include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/threadpool.h b/paddle/fluid/framework/threadpool.h index 09528b6fc35..7fecf07475b 100644 --- a/paddle/fluid/framework/threadpool.h +++ b/paddle/fluid/framework/threadpool.h @@ -23,6 +23,7 @@ limitations under the License. */ #include // NOLINT #include #include + #include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index 077fe751720..d041ef48e2c 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -14,14 +14,15 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include // NOLINT #include #include // NOLINT #include -#include #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/device_worker.h" @@ -38,6 +39,15 @@ limitations under the License. */ namespace paddle { namespace framework { +class Dataset; +class LoDTensor; +class ProgramDesc; +class PullDenseWorker; +class Scope; +class VarDesc; +template +class ChannelObject; + class TrainerBase { public: TrainerBase() {} diff --git a/paddle/fluid/framework/trainer_factory.cc b/paddle/fluid/framework/trainer_factory.cc index 31ac11e78cf..15584620d86 100644 --- a/paddle/fluid/framework/trainer_factory.cc +++ b/paddle/fluid/framework/trainer_factory.cc @@ -17,11 +17,11 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/trainer.h" - namespace paddle { namespace framework { +class TrainerBase; + typedef std::shared_ptr (*CreatetrainerFunction)(); typedef std::unordered_map trainerMap; trainerMap g_trainer_map; diff --git a/paddle/fluid/framework/trainer_factory.h b/paddle/fluid/framework/trainer_factory.h index 9c772a4f19e..3ef61c03817 100644 --- a/paddle/fluid/framework/trainer_factory.h +++ b/paddle/fluid/framework/trainer_factory.h @@ -16,11 +16,14 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/trainer.h" namespace paddle { namespace framework { +class TrainerBase; + class TrainerFactory { public: static std::string TrainerTypeList(); diff --git a/paddle/fluid/framework/transfer_scope_cache.h b/paddle/fluid/framework/transfer_scope_cache.h index 9a5d4526321..7ff6020ff2a 100644 --- a/paddle/fluid/framework/transfer_scope_cache.h +++ b/paddle/fluid/framework/transfer_scope_cache.h @@ -17,12 +17,16 @@ #include // NOLINT #include #include + #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/scope.h" namespace paddle { namespace framework { +class OpKernelType; +class Scope; + std::unordered_map& global_transfer_data_cache(); std::unordered_set& global_transfer_scope_cache(); diff --git a/paddle/fluid/framework/tuple.h b/paddle/fluid/framework/tuple.h index 508ee931c6e..35b8220d453 100644 --- a/paddle/fluid/framework/tuple.h +++ b/paddle/fluid/framework/tuple.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/var_desc.h" diff --git a/paddle/fluid/framework/tuple_test.cc b/paddle/fluid/framework/tuple_test.cc index cfdd4dc56e4..9060bd3fc89 100644 --- a/paddle/fluid/framework/tuple_test.cc +++ b/paddle/fluid/framework/tuple_test.cc @@ -11,11 +11,8 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include - -#include "gtest/gtest.h" #include "paddle/fluid/framework/tuple.h" +#include "gtest/gtest.h" TEST(Tuple, Make) { std::vector element_type; diff --git a/paddle/fluid/framework/unroll_array_ops.h b/paddle/fluid/framework/unroll_array_ops.h index ab176410805..a9c047cc6c6 100644 --- a/paddle/fluid/framework/unroll_array_ops.h +++ b/paddle/fluid/framework/unroll_array_ops.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/platform/hostdevice.h" namespace paddle { diff --git a/paddle/fluid/framework/unroll_array_ops_test.cc b/paddle/fluid/framework/unroll_array_ops_test.cc index be811478eec..c4fdfdb425f 100644 --- a/paddle/fluid/framework/unroll_array_ops_test.cc +++ b/paddle/fluid/framework/unroll_array_ops_test.cc @@ -13,10 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/unroll_array_ops.h" + #include -#include #include -#include namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/unused_var_check.cc b/paddle/fluid/framework/unused_var_check.cc index e7e964b4181..ac455b9ffd7 100644 --- a/paddle/fluid/framework/unused_var_check.cc +++ b/paddle/fluid/framework/unused_var_check.cc @@ -16,11 +16,11 @@ limitations under the License. */ #include #include - #include -#include #include +#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" +#include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/unused_var_check.h b/paddle/fluid/framework/unused_var_check.h index 2f44a3bcde0..d78b4d928f3 100644 --- a/paddle/fluid/framework/unused_var_check.h +++ b/paddle/fluid/framework/unused_var_check.h @@ -16,7 +16,6 @@ limitations under the License. */ #include #include - #include #include diff --git a/paddle/fluid/framework/var_desc.cc b/paddle/fluid/framework/var_desc.cc index 2ee0b17b64b..457c0c77b3c 100644 --- a/paddle/fluid/framework/var_desc.cc +++ b/paddle/fluid/framework/var_desc.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/var_desc.h b/paddle/fluid/framework/var_desc.h index 6e8be0fdd42..b37a09a17a9 100644 --- a/paddle/fluid/framework/var_desc.h +++ b/paddle/fluid/framework/var_desc.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/framework.pb.h" diff --git a/paddle/fluid/framework/var_type_inference.h b/paddle/fluid/framework/var_type_inference.h index 9312ac075de..f649c9388f0 100644 --- a/paddle/fluid/framework/var_type_inference.h +++ b/paddle/fluid/framework/var_type_inference.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" @@ -24,8 +25,8 @@ limitations under the License. */ namespace paddle { namespace framework { -class OpDesc; class BlockDesc; +class OpDesc; class StaticGraphVarTypeInference; // default infer var type context diff --git a/paddle/fluid/framework/var_type_inference_test.cc b/paddle/fluid/framework/var_type_inference_test.cc index dc86d58f600..5483ef01c08 100644 --- a/paddle/fluid/framework/var_type_inference_test.cc +++ b/paddle/fluid/framework/var_type_inference_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/var_type_inference.h" + #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -22,6 +24,8 @@ limitations under the License. */ namespace paddle { namespace framework { +class Scope; + class NOP : public OperatorBase { public: NOP(const std::string& type, const VariableNameMap& inputs, diff --git a/paddle/fluid/framework/var_type_traits.h b/paddle/fluid/framework/var_type_traits.h index 4875956096a..07387f87411 100644 --- a/paddle/fluid/framework/var_type_traits.h +++ b/paddle/fluid/framework/var_type_traits.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor_array.h" @@ -43,12 +44,12 @@ class NCCLCommunicator; } // namespace platform namespace framework { -class Tensor; -class LoDTensor; -class SelectedRows; class LoDRankTable; +class LoDTensor; class ReaderHolder; class Scope; +class SelectedRows; +class Tensor; } // namespace framework namespace operators { diff --git a/paddle/fluid/framework/variable_helper.h b/paddle/fluid/framework/variable_helper.h index 01a5d09e072..6e65bc2c932 100644 --- a/paddle/fluid/framework/variable_helper.h +++ b/paddle/fluid/framework/variable_helper.h @@ -21,6 +21,8 @@ limitations under the License. */ namespace paddle { namespace framework { +class Variable; + void InitializeVariable(Variable* var, proto::VarType::Type var_type); void CopyVariable(const Variable& src_var, Variable* dst_var); diff --git a/paddle/fluid/framework/variable_test.cc b/paddle/fluid/framework/variable_test.cc index 511c9c52146..98a8ff9cf3e 100644 --- a/paddle/fluid/framework/variable_test.cc +++ b/paddle/fluid/framework/variable_test.cc @@ -12,12 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" +#include "gtest/gtest.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/version.cc b/paddle/fluid/framework/version.cc index 7ba00f52e67..92042e47259 100644 --- a/paddle/fluid/framework/version.cc +++ b/paddle/fluid/framework/version.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/version.h" -#include + #include namespace paddle { diff --git a/paddle/fluid/imperative/all_reduce.h b/paddle/fluid/imperative/all_reduce.h index 81880d0fb89..249fb4e11f1 100644 --- a/paddle/fluid/imperative/all_reduce.h +++ b/paddle/fluid/imperative/all_reduce.h @@ -25,9 +25,17 @@ #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/imperative/nccl_context.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace imperative { +struct ParallelStrategy; + void AllReduce(const framework::Variable &src, framework::Variable *dst, const ParallelStrategy &strategy); diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc index c980b014b82..d4a1519b07e 100644 --- a/paddle/fluid/imperative/amp_auto_cast.cc +++ b/paddle/fluid/imperative/amp_auto_cast.cc @@ -14,20 +14,17 @@ #include "paddle/fluid/imperative/amp_auto_cast.h" -#include #include -#include #include -#include #include -#include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/tracer.h" -#include "paddle/fluid/imperative/variable_wrapper.h" namespace paddle { namespace imperative { +class VarBase; + AmpOperators::AmpOperators() : allow_ops_(new std::unordered_set()), block_ops_(new std::unordered_set()) {} diff --git a/paddle/fluid/imperative/amp_auto_cast.h b/paddle/fluid/imperative/amp_auto_cast.h index d1da97e5a39..7ab876c1ce8 100644 --- a/paddle/fluid/imperative/amp_auto_cast.h +++ b/paddle/fluid/imperative/amp_auto_cast.h @@ -26,6 +26,8 @@ namespace paddle { namespace imperative { // Singleton implementation with C++ 11 +class Tracer; + class AmpOperators { public: ~AmpOperators(); diff --git a/paddle/fluid/imperative/data_loader.cc b/paddle/fluid/imperative/data_loader.cc index 3b8239e566d..a2fccf7901f 100644 --- a/paddle/fluid/imperative/data_loader.cc +++ b/paddle/fluid/imperative/data_loader.cc @@ -16,13 +16,9 @@ #include "paddle/fluid/imperative/data_loader.h" -#include #include - -#include #include #include -#include #include "paddle/fluid/memory/allocation/mmap_allocator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/imperative/jit/op_desc_meta.cc b/paddle/fluid/imperative/jit/op_desc_meta.cc index f5c00985900..d256dafb891 100644 --- a/paddle/fluid/imperative/jit/op_desc_meta.cc +++ b/paddle/fluid/imperative/jit/op_desc_meta.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/imperative/jit/op_desc_meta.h" -#include "paddle/fluid/framework/op_info.h" namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/jit/op_desc_meta.h b/paddle/fluid/imperative/jit/op_desc_meta.h index 506afee6a99..c59b2c885d2 100644 --- a/paddle/fluid/imperative/jit/op_desc_meta.h +++ b/paddle/fluid/imperative/jit/op_desc_meta.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/type_defs.h" diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc index be93a787d46..9f4cf713f7c 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.cc +++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc @@ -15,7 +15,12 @@ #include "paddle/fluid/imperative/jit/program_desc_tracer.h" #include #include -#include + +namespace paddle { +namespace imperative { +class VarBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.h b/paddle/fluid/imperative/jit/program_desc_tracer.h index d07acec2230..8e2e59a49ed 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.h +++ b/paddle/fluid/imperative/jit/program_desc_tracer.h @@ -21,12 +21,19 @@ #include #include #include + #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/imperative/jit/op_desc_meta.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace imperative { +class VarBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace imperative { namespace jit { diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 03e83301d44..d4df052a40d 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -24,6 +24,7 @@ #include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/var_type.h" @@ -35,10 +36,18 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace imperative { class OpBase; +class GradOpNode; +class VariableWrapper; class ThreadSafeNameSet { public: diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index bf02eebdbb6..3bf032e642b 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -17,12 +17,23 @@ #include #include #include + #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/type_defs.h" +namespace paddle { +namespace framework { +class Tensor; +class Variable; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace imperative { diff --git a/paddle/fluid/imperative/profiler.cc b/paddle/fluid/imperative/profiler.cc index 34570b3a60e..85063a68216 100644 --- a/paddle/fluid/imperative/profiler.cc +++ b/paddle/fluid/imperative/profiler.cc @@ -19,8 +19,7 @@ #endif #include #include -#include // NOLINT -#include // NOLINT +#include // NOLINT DEFINE_string( tracer_profile_fname, "xxgperf", diff --git a/paddle/fluid/imperative/tests/nccl_context_test.cc b/paddle/fluid/imperative/tests/nccl_context_test.cc index 8ce257a6c37..93ea988d638 100644 --- a/paddle/fluid/imperative/tests/nccl_context_test.cc +++ b/paddle/fluid/imperative/tests/nccl_context_test.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/imperative/nccl_context.h" + #include "gtest/gtest.h" -#include "paddle/fluid/platform/device_context.h" namespace imperative = paddle::imperative; namespace platform = paddle::platform; diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc index ebb19fd486c..ee109310483 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc @@ -18,7 +18,14 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" #include -#include + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h index 1257562972e..621c631b853 100644 --- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h @@ -27,6 +27,12 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/node.h" +namespace paddle { +namespace framework { +class BlockDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h index b6b67ce8ece..bb0248008e0 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h @@ -18,11 +18,21 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h b/paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h index 65d1c545313..66d5667a37c 100644 --- a/paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h +++ b/paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h @@ -30,6 +30,8 @@ namespace analysis { * The default cudnn workspace is 4G, we set it to 64M in this pass, which * is applicable for most inference tasks. */ +struct Argument; + class AdjustCudnnWorkSpacePass : public AnalysisPass { public: void RunImpl(Argument *argument) override; diff --git a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h index 7fbdd88e014..bbdf8b1009a 100644 --- a/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h +++ b/paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h @@ -32,6 +32,8 @@ namespace analysis { * So, We added the corresponding inference impl to these ops separately. * This pass replaces these ops with corresponding inference ops. */ +struct Argument; + class InferenceOpReplacePass : public AnalysisPass { public: void RunImpl(Argument *argument) override; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h index a9d58aa2f4c..a4d60e91e84 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_graph_clean_pass.h @@ -16,12 +16,15 @@ #include #include + #include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { namespace inference { namespace analysis { +struct Argument; + class IrInferCleanGraphPass : public AnalysisPass { public: void RunImpl(Argument *argument) override; diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h index 838ebdbc9d7..613eb04497e 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h @@ -14,12 +14,15 @@ #pragma once +#include #include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { namespace inference { namespace analysis { +struct Argument; + class IrGraphToProgramPass : public AnalysisPass { public: void RunImpl(Argument *argument) override; diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index f432188131e..defa0a525f6 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -13,23 +13,24 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" + #include -#include #include #include -#include #include #include -#include #include -#include + #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/graph_to_program_pass.h" -#include "paddle/fluid/framework/ir/graph_traits.h" -#include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace framework { +namespace ir { +class Graph; +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h index 77da5d40d8d..6d20aee295b 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h @@ -18,9 +18,18 @@ #include #include #include + #include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/platform/port.h" +namespace paddle { +namespace framework { +namespace ir { +class Graph; +} // namespace ir +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 2184574aa1f..4abe293c930 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" namespace paddle { +struct MkldnnQuantizerConfig; + extern const std::vector kTRTSubgraphPasses; extern const std::vector kLiteSubgraphPasses; diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index 96b94777304..be771ac48fc 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -19,18 +19,26 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/inference/api/details/reset_tensor_array.h" +#include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/init.h" +#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { +namespace framework { +class LoDTensor; +class Scope; +} // namespace framework + class NativePaddlePredictor : public PaddlePredictor { public: explicit NativePaddlePredictor(const NativeConfig &config) diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.cc b/paddle/fluid/inference/api/details/reset_tensor_array.cc index 03c2aa3fb80..ee5c10b7bf6 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.cc +++ b/paddle/fluid/inference/api/details/reset_tensor_array.cc @@ -14,6 +14,12 @@ #include "paddle/fluid/inference/api/details/reset_tensor_array.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace details { diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.h b/paddle/fluid/inference/api/details/reset_tensor_array.h index 213c6891d0e..be5fe1d64f9 100644 --- a/paddle/fluid/inference/api/details/reset_tensor_array.h +++ b/paddle/fluid/inference/api/details/reset_tensor_array.h @@ -16,10 +16,18 @@ #include #include + #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" +namespace paddle { +namespace framework { +class LoDTensor; +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace details { diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 014985661fd..061b83e1d1e 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -27,6 +27,7 @@ #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc index 76cf1661f30..30c6c21ec87 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_config.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_config.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h" namespace paddle { diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 90732535204..b10c290b226 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle_infer_declare.h" // NOLINT /// diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index 1a428e205cb..57aeee99ba2 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -12,8 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/op_registry.h" +#include + +#include "glog/logging.h" +#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/engine.h" +#include "paddle/fluid/inference/tensorrt/helper.h" +#include "paddle/fluid/platform/enforce.h" + +namespace nvinfer1 { +class IActivationLayer; +class ITensor; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index f67370b3382..2f4f9320607 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -12,9 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace nvinfer1 { +class IScaleLayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 5d63aa2ace8..5ecf1923388 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc index aa03bc44bd6..f582d7e0705 100644 --- a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc index 0541d891705..d11dbc16e87 100644 --- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc index 9fff558c583..b846b3033f6 100644 --- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc @@ -13,6 +13,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index 0dccd3cc639..cd16ed73965 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc index 7927b6cd1bb..aad822b3354 100644 --- a/paddle/fluid/inference/tensorrt/convert/gelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gelu_op.cc @@ -15,6 +15,18 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h" +namespace nvinfer1 { +class ILayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc b/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc index 3b6e464a117..f2c1bafb4ae 100644 --- a/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/hard_sigmoid_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc b/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc index 39820068ca8..967f79a1643 100644 --- a/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/hard_swish_op.cc @@ -15,6 +15,18 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h" +namespace nvinfer1 { +class ILayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc index 8a0b50f4744..d746c51c5c5 100644 --- a/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/instance_norm_op.cc @@ -15,6 +15,18 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h" +namespace nvinfer1 { +class IPluginLayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc index 29826909c3c..e348de9877f 100644 --- a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc @@ -14,6 +14,18 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace nvinfer1 { +class ILayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/mul_op.cc b/paddle/fluid/inference/tensorrt/convert/mul_op.cc index 5b6aaad4983..c99528b207b 100644 --- a/paddle/fluid/inference/tensorrt/convert/mul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/mul_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/pad_op.cc b/paddle/fluid/inference/tensorrt/convert/pad_op.cc index dd594404d33..7ddedf969fd 100644 --- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index 864e440920e..c031630f36d 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -15,6 +15,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/scale_op.cc b/paddle/fluid/inference/tensorrt/convert/scale_op.cc index f9a1fe41ddc..18c97890d72 100644 --- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc b/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc index 326915dfff5..7090e298ddc 100644 --- a/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/shuffle_channel_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc index 9f4a048961f..0388154427e 100644 --- a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc @@ -14,6 +14,15 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/convert/swish_op.cc b/paddle/fluid/inference/tensorrt/convert/swish_op.cc index e220d80f0d7..ab82a6578fb 100644 --- a/paddle/fluid/inference/tensorrt/convert/swish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/swish_op.cc @@ -15,6 +15,18 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h" +namespace nvinfer1 { +class ILayer; +} // namespace nvinfer1 +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 754979f77ac..1f7ea7ea044 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -15,10 +15,9 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/engine.h" #include -#include #include #include -#include "paddle/fluid/inference/analysis/helper.h" + #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/platform/enforce.h" @@ -26,6 +25,10 @@ namespace paddle { namespace inference { namespace tensorrt { +namespace plugin { +class PluginTensorRT; +} // namespace plugin + int TensorRTEngine::runtime_batch_ = 1; void TensorRTEngine::InitNetwork() { diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index a85ed483c1d..71625210054 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -23,6 +23,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" @@ -33,10 +34,20 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h" #include "paddle/fluid/inference/utils/singleton.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { +namespace plugin { +class PluginTensorRT; +} // namespace plugin + using FluidDT = framework::proto::VarType_Type; using TRT_DT = nvinfer1::DataType; @@ -94,6 +105,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, } // NOLINT class TRTInt8Calibrator; + /* * TensorRT Engine. * diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 31128ba8c5d..23aacedd693 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -14,6 +14,12 @@ #include "paddle/fluid/inference/tensorrt/op_teller.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/op_teller.h b/paddle/fluid/inference/tensorrt/op_teller.h index 76784c7445e..9113525a5c9 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.h +++ b/paddle/fluid/inference/tensorrt/op_teller.h @@ -17,9 +17,16 @@ #include #include #include + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/inference/tensorrt/engine.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace inference { namespace tensorrt { diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h index ed825801fc4..076dfbcf8f0 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h @@ -26,6 +26,7 @@ #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h" #include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/variant.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc index 72962c733ec..7c763858bb2 100644 --- a/paddle/fluid/inference/tensorrt/test_engine.cc +++ b/paddle/fluid/inference/tensorrt/test_engine.cc @@ -12,13 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include #include #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h index 5815bc9a146..b4b7ee50dc3 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h @@ -23,6 +23,7 @@ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/inference/utils/benchmark.cc b/paddle/fluid/inference/utils/benchmark.cc index 074a397e323..8c76a03d298 100644 --- a/paddle/fluid/inference/utils/benchmark.cc +++ b/paddle/fluid/inference/utils/benchmark.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/utils/benchmark.h" -#include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/inference/utils/io_utils.h b/paddle/fluid/inference/utils/io_utils.h index 853aba168b5..de2c7b26d33 100644 --- a/paddle/fluid/inference/utils/io_utils.h +++ b/paddle/fluid/inference/utils/io_utils.h @@ -16,9 +16,14 @@ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/api/paddle_api.h" +namespace paddle { +struct PaddleTensor; +} // namespace paddle + namespace paddle { namespace inference { diff --git a/paddle/fluid/memory/allocation/aligned_allocator.h b/paddle/fluid/memory/allocation/aligned_allocator.h index 71250766034..6fef5cae8d6 100644 --- a/paddle/fluid/memory/allocation/aligned_allocator.h +++ b/paddle/fluid/memory/allocation/aligned_allocator.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/memory/allocation/allocator.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/allocator.h b/paddle/fluid/memory/allocation/allocator.h index 379c8d00960..e54748a5367 100644 --- a/paddle/fluid/memory/allocation/allocator.h +++ b/paddle/fluid/memory/allocation/allocator.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/inlined_vector.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc index 3e10be037bd..00299911162 100644 --- a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc +++ b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/memory/allocation/allocator_facade.h" -#include #include +#include "paddle/fluid/memory/allocation/allocator_facade.h" + #ifdef PADDLE_WITH_CUDA DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_cuda_pinned_memory_to_use); diff --git a/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc b/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc index 3748805b1ce..1e793d1617a 100644 --- a/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc +++ b/paddle/fluid/memory/allocation/allocator_facade_frac_flags_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/memory/allocation/allocator_facade.h" -#include #include +#include "paddle/fluid/memory/allocation/allocator_facade.h" + #ifdef PADDLE_WITH_CUDA DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_cuda_pinned_memory_to_use); diff --git a/paddle/fluid/memory/allocation/allocator_strategy.cc b/paddle/fluid/memory/allocation/allocator_strategy.cc index e1c7b243bec..518b31e9430 100644 --- a/paddle/fluid/memory/allocation/allocator_strategy.cc +++ b/paddle/fluid/memory/allocation/allocator_strategy.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/allocator_strategy.h" + #include "gflags/gflags.h" -#include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" DECLARE_string(allocator_strategy); diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h index 27257883d55..cbc126264ac 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h @@ -19,6 +19,7 @@ #include #include // NOLINT #include + #include "paddle/fluid/memory/allocation/allocator.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc index 8865bf0b0db..685248a88f7 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" + #include #include + #include "gtest/gtest.h" DECLARE_bool(free_idle_chunk); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index 957dac4d03e..2b8d2164f68 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -13,11 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/best_fit_allocator.h" - -#include -#include -#include -#include +#include namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 64a552e4fd2..a6015417b12 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -16,8 +16,15 @@ #include #include #include + #include "paddle/fluid/memory/allocation/allocator.h" +namespace paddle { +namespace platform { +class Place; +} // namespace platform +} // namespace paddle + namespace paddle { namespace memory { namespace allocation { diff --git a/paddle/fluid/memory/allocation/buffered_allocator.cc b/paddle/fluid/memory/allocation/buffered_allocator.cc index 7eed81a712a..d463ad1f5eb 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.cc +++ b/paddle/fluid/memory/allocation/buffered_allocator.cc @@ -13,9 +13,6 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/buffered_allocator.h" - -#include -#include #include namespace paddle { diff --git a/paddle/fluid/memory/allocation/buffered_allocator.h b/paddle/fluid/memory/allocation/buffered_allocator.h index fd0996f7748..5e1733bd839 100644 --- a/paddle/fluid/memory/allocation/buffered_allocator.h +++ b/paddle/fluid/memory/allocation/buffered_allocator.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/lock_guard_ptr.h" diff --git a/paddle/fluid/memory/allocation/cpu_allocator.cc b/paddle/fluid/memory/allocation/cpu_allocator.cc index b096fbc112c..407f0f25935 100644 --- a/paddle/fluid/memory/allocation/cpu_allocator.cc +++ b/paddle/fluid/memory/allocation/cpu_allocator.cc @@ -16,8 +16,6 @@ #include -#include - namespace paddle { namespace memory { namespace allocation { diff --git a/paddle/fluid/memory/allocation/locked_allocator.h b/paddle/fluid/memory/allocation/locked_allocator.h index 4af77e6e057..1b8418bc849 100644 --- a/paddle/fluid/memory/allocation/locked_allocator.h +++ b/paddle/fluid/memory/allocation/locked_allocator.h @@ -15,6 +15,7 @@ #include #include // NOLINT #include // NOLINT + #include "paddle/fluid/memory/allocation/allocator.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/mmap_allocator.cc b/paddle/fluid/memory/allocation/mmap_allocator.cc index 0ef084bafd0..77e8d9943d0 100644 --- a/paddle/fluid/memory/allocation/mmap_allocator.cc +++ b/paddle/fluid/memory/allocation/mmap_allocator.cc @@ -19,13 +19,8 @@ #include #include #include -#include -#include -#include - #include #include -#include namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/allocation/mmap_allocator_test.cc b/paddle/fluid/memory/allocation/mmap_allocator_test.cc index 5b66920be2a..bcb02e04792 100644 --- a/paddle/fluid/memory/allocation/mmap_allocator_test.cc +++ b/paddle/fluid/memory/allocation/mmap_allocator_test.cc @@ -16,8 +16,6 @@ #include "paddle/fluid/memory/allocation/mmap_allocator.h" -#include - #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h index 913d583099c..4cf1bd6123e 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h @@ -18,8 +18,10 @@ #include #include #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" + namespace paddle { namespace memory { namespace allocation { diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index 3ea2ecf3538..42dd50af729 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -14,9 +14,6 @@ #include "paddle/fluid/memory/allocation/pinned_allocator.h" -#include -#include - namespace paddle { namespace memory { namespace allocation { diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.h b/paddle/fluid/memory/allocation/thread_local_allocator.h index bc07ad0c4dc..10ca4b828a4 100644 --- a/paddle/fluid/memory/allocation/thread_local_allocator.h +++ b/paddle/fluid/memory/allocation/thread_local_allocator.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/detail/buddy_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h" diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc index faa87f1f01a..6ac99744d79 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.cc +++ b/paddle/fluid/memory/detail/buddy_allocator.cc @@ -13,10 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/memory/detail/buddy_allocator.h" - #include #include - #include "glog/logging.h" #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/memory/detail/buddy_allocator_test.cc b/paddle/fluid/memory/detail/buddy_allocator_test.cc index 780126f57c5..1722acd10aa 100644 --- a/paddle/fluid/memory/detail/buddy_allocator_test.cc +++ b/paddle/fluid/memory/detail/buddy_allocator_test.cc @@ -21,12 +21,9 @@ limitations under the License. */ #endif #include "gflags/gflags.h" #include "gtest/gtest.h" -#include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/platform/gpu_info.h" #ifdef PADDLE_WITH_CUDA -#include - #include #include diff --git a/paddle/fluid/memory/detail/system_allocator_test.cc b/paddle/fluid/memory/detail/system_allocator_test.cc index 34bb40d549d..ea4897494f7 100644 --- a/paddle/fluid/memory/detail/system_allocator_test.cc +++ b/paddle/fluid/memory/detail/system_allocator_test.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/memory/detail/system_allocator.h" #include -#include #include "gflags/gflags.h" #include "gtest/gtest.h" diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 9ba572acaca..73487795f75 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -15,8 +15,10 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" + namespace paddle { namespace platform { diff --git a/paddle/fluid/operators/activation_cudnn_op.cu.cc b/paddle/fluid/operators/activation_cudnn_op.cu.cc index 33d8fb828f8..1903b9e30d8 100644 --- a/paddle/fluid/operators/activation_cudnn_op.cu.cc +++ b/paddle/fluid/operators/activation_cudnn_op.cu.cc @@ -12,13 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/cudnn_desc.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { using framework::Tensor; diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index ea1a93b5826..89475e3a2ec 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -12,20 +12,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include "paddle/fluid/framework/lod_rank_table.h" -#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { using LoD = framework::LoD; struct ArrayToLoDFunctor; + template struct ArrayToLoDFunctorImpl { const ArrayToLoDFunctor *prev_functor_; diff --git a/paddle/fluid/operators/assert_op.cc b/paddle/fluid/operators/assert_op.cc index da0e5fda636..3e4250389fc 100644 --- a/paddle/fluid/operators/assert_op.cc +++ b/paddle/fluid/operators/assert_op.cc @@ -13,10 +13,24 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include "paddle/fluid/operators/tensor_formatter.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class LoDTensor; +class OpDesc; +class Scope; +class Variable; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + const char kCond[] = "Cond"; const char kData[] = "Data"; const char kSummarize[] = "summarize"; diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc index f8c1216e972..e5bceae1c95 100644 --- a/paddle/fluid/operators/assign_op.cc +++ b/paddle/fluid/operators/assign_op.cc @@ -14,9 +14,23 @@ limitations under the License. */ #include "paddle/fluid/operators/assign_op.h" -#include #include +namespace paddle { +namespace framework { +class OpDesc; +class Variable; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/assign_op.h b/paddle/fluid/operators/assign_op.h index c2154f78bbe..ed4ba24a74b 100644 --- a/paddle/fluid/operators/assign_op.h +++ b/paddle/fluid/operators/assign_op.h @@ -19,6 +19,13 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class LoDTensor; +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { class AssignFunctor { diff --git a/paddle/fluid/operators/assign_op_test.cc b/paddle/fluid/operators/assign_op_test.cc index 58f360ad605..f0ec04a1f20 100644 --- a/paddle/fluid/operators/assign_op_test.cc +++ b/paddle/fluid/operators/assign_op_test.cc @@ -14,8 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/assign_op.h" #include -#include -#include #include #include "paddle/fluid/framework/ddim.h" diff --git a/paddle/fluid/operators/assign_value_op.cc b/paddle/fluid/operators/assign_value_op.cc index a35e5a80a9e..1589f9e8911 100644 --- a/paddle/fluid/operators/assign_value_op.cc +++ b/paddle/fluid/operators/assign_value_op.cc @@ -13,8 +13,23 @@ // limitations under the License. #include "paddle/fluid/operators/assign_value_op.h" + #include -#include + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/assign_value_op.h b/paddle/fluid/operators/assign_value_op.h index 1418d96b67b..6c2e5b9ad68 100644 --- a/paddle/fluid/operators/assign_value_op.h +++ b/paddle/fluid/operators/assign_value_op.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index c177dad8028..3cb3f1d48bf 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -12,12 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include "paddle/fluid/operators/beam_search_decode_op.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cc index bcb529f1570..835b49e57bc 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc index 34054103aa0..ec8d6518195 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cc index 9d27a9ceb30..efc19659c83 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc index 4e8b6f9d0a9..7935a1f722e 100644 --- a/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_min_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc index 3cfb1723f18..5ab07ef026b 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc index 61f76c178d0..1a78427cd19 100644 --- a/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_prod_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc index c9ed8b67647..68061e6ae6b 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cc @@ -14,6 +14,19 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc index 8fe7fce21e4..06e90cdff80 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_allreduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index a3bf1f4dfb1..ccad96320a7 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -14,19 +14,17 @@ limitations under the License. */ #if defined(PADDLE_WITH_NCCL) #include #endif -#include -#include #include -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/threadpool.h" -#include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/request_handler_impl.h" + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/nccl_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index e2b09be5a9d..2822129b198 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -11,25 +11,23 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#if defined(PADDLE_WITH_NCCL) -#include -#endif - -#include -#include #include +#include "glog/logging.h" #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/threadpool.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/operators/distributed/distributed.h" +#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler_impl.h" - -#if defined(PADDLE_WITH_NCCL) -#include "paddle/fluid/platform/nccl_helper.h" -#endif +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cc index 42535187768..41a07f94399 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc index 7e260346b4b..e03da37360f 100644 --- a/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_max_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cc index 8e849641e63..77bb96347f9 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc index 77a75ed0b7a..83f7fce1ec6 100644 --- a/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_min_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cc index 64935df856e..f6c1c5d50e8 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc index 07e431f7bc8..83db107b36f 100644 --- a/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_prod_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cc index 3e20cee7e18..e59ec85fefd 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cc @@ -14,6 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc index d9826422c16..39c8716a92a 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/collective/c_reduce_op.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace ops = paddle::operators; namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc index 64b60165722..00f366e6212 100644 --- a/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_calc_stream_op.cc @@ -11,19 +11,15 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#if defined(PADDLE_WITH_NCCL) -#include -#endif - #include -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#if defined(PADDLE_WITH_NCCL) -#include "paddle/fluid/platform/collective_helper.h" -#endif +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index 5405ea70ef6..d8617492fb1 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -11,18 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#if defined(PADDLE_WITH_NCCL) -#include -#endif - #include -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/nccl_helper.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index 22b212fc1b9..ce622d7501f 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -13,10 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/common_infer_shape_functions.h" - #include #include +namespace paddle { +namespace framework { +class InferShapeContext; +} // namespace framework +} // namespace paddle + // This file almostly contains all the infershape functions that are used in // operators. diff --git a/paddle/fluid/operators/common_infer_shape_functions.h b/paddle/fluid/operators/common_infer_shape_functions.h index 2cb9eab2865..922d5262abc 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.h +++ b/paddle/fluid/operators/common_infer_shape_functions.h @@ -17,6 +17,12 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class InferShapeContext; +} // namespace framework +} // namespace paddle + // This file almostly contains all the infershape functions that are used in // operators. diff --git a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc index aa31b887562..62019be26cd 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_infer_op.cc @@ -14,6 +14,18 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h index 659e098c6dd..c8ab2c91e91 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/var_type.h" diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc index 155a5dbfec3..00b86121c0d 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.cc @@ -13,12 +13,19 @@ // limitations under the License. #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h" + #include #include #include -#include + #include "paddle/fluid/operators/controlflow/op_variant.h" +namespace paddle { +namespace framework { +class ProgramDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_helper.h b/paddle/fluid/operators/controlflow/conditional_block_op_helper.h index f7dfba6f364..abaaa897606 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op_helper.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op_helper.h @@ -16,9 +16,16 @@ #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/controlflow/conditional_block_op.h" +namespace paddle { +namespace framework { +class ProgramDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/conditional_block_op_test.cc b/paddle/fluid/operators/controlflow/conditional_block_op_test.cc index a5ca4a289de..068d853f1cc 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op_test.cc +++ b/paddle/fluid/operators/controlflow/conditional_block_op_test.cc @@ -13,14 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/controlflow/conditional_block_op.h" -#include -#include -#include + #include "gtest/gtest.h" -#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/var_type.h" USE_NO_KERNEL_OP(conditional_block); USE_NO_KERNEL_OP(conditional_block_grad); diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc index 088413ce223..9597dd25ec5 100644 --- a/paddle/fluid/operators/controlflow/feed_op.cc +++ b/paddle/fluid/operators/controlflow/feed_op.cc @@ -12,10 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { class FeedOp : public framework::OperatorBase { diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc index 4ab9f9fc863..e8829e1e1fa 100644 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ b/paddle/fluid/operators/controlflow/get_places_op.cc @@ -12,9 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/gpu_info.h" #endif diff --git a/paddle/fluid/operators/controlflow/op_variant.h b/paddle/fluid/operators/controlflow/op_variant.h index 9af993f1006..28c27437de1 100644 --- a/paddle/fluid/operators/controlflow/op_variant.h +++ b/paddle/fluid/operators/controlflow/op_variant.h @@ -20,6 +20,13 @@ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/platform/variant.h" +namespace paddle { +namespace framework { +class OpDesc; +class OperatorBase; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc index f2973add84e..c96b7c6a08c 100644 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc +++ b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc @@ -13,14 +13,15 @@ // limitations under the License. #include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" - #include #include -#include -#include -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/operators/recurrent_op.h" +namespace paddle { +namespace framework { +class BlockDesc; +class ProgramDesc; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.h b/paddle/fluid/operators/controlflow/recurrent_op_helper.h index aacca0762ca..3d9404e57aa 100644 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.h +++ b/paddle/fluid/operators/controlflow/recurrent_op_helper.h @@ -26,6 +26,12 @@ #include "paddle/fluid/platform/variant.h" #include "paddle/fluid/string/string_helper.h" +namespace paddle { +namespace framework { +class ProgramDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc index 9f7702a5d6b..c4451c3b583 100644 --- a/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc @@ -12,7 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/array_operator.h" -#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index e4a1397f5c4..b85e740ada9 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -12,14 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/controlflow/while_op_helper.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class VarDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/controlflow/while_op_helper.h b/paddle/fluid/operators/controlflow/while_op_helper.h index 4f9d93c91f6..d2e9953e647 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.h +++ b/paddle/fluid/operators/controlflow/while_op_helper.h @@ -17,9 +17,17 @@ #include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/variant.h" +namespace paddle { +namespace framework { +class LoDTensor; +class ProgramDesc; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/cudnn_lstm_op.cu.cc b/paddle/fluid/operators/cudnn_lstm_op.cu.cc index 6457d9295dc..6ac75b78d70 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cu.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cu.cc @@ -16,8 +16,13 @@ limitations under the License. */ #include "paddle/fluid/operators/cudnn_lstm_cache.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/utils.h" -#include "paddle/fluid/platform/cudnn_desc.h" -#include "paddle/fluid/platform/cudnn_helper.h" + +namespace paddle { +namespace platform { +class CUDADeviceContext; +struct CUDAPlace; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/delete_var_op.cc b/paddle/fluid/operators/delete_var_op.cc index ec60569be20..aa5649e4e9c 100644 --- a/paddle/fluid/operators/delete_var_op.cc +++ b/paddle/fluid/operators/delete_var_op.cc @@ -12,6 +12,19 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { class DeleteVarOp : public framework::OperatorBase { diff --git a/paddle/fluid/operators/dequantize_abs_max_op.cc b/paddle/fluid/operators/dequantize_abs_max_op.cc index 0d4d68d9f62..c8bca25b6b0 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.cc +++ b/paddle/fluid/operators/dequantize_abs_max_op.cc @@ -13,8 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/dequantize_abs_max_op.h" + #include -#include + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dequantize_abs_max_op.h b/paddle/fluid/operators/dequantize_abs_max_op.h index 796ca93b000..0d9d20fc120 100644 --- a/paddle/fluid/operators/dequantize_abs_max_op.h +++ b/paddle/fluid/operators/dequantize_abs_max_op.h @@ -15,10 +15,17 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { @@ -35,6 +42,7 @@ class DequantizeMaxAbsKernel : public framework::OpKernel { virtual void Compute(const framework::ExecutionContext& ctx) const { auto* in = ctx.Input("X"); auto* scale = ctx.Input("Scale"); + auto* out = ctx.Output("Out"); float max_range = ctx.Attr("max_range"); diff --git a/paddle/fluid/operators/dequantize_log_op.cc b/paddle/fluid/operators/dequantize_log_op.cc index 2ecd54f7edd..c12dd9e6d21 100644 --- a/paddle/fluid/operators/dequantize_log_op.cc +++ b/paddle/fluid/operators/dequantize_log_op.cc @@ -13,9 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/dequantize_log_op.h" -#include + #include -#include + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dequantize_log_op.h b/paddle/fluid/operators/dequantize_log_op.h index f6590ecf61f..67ce9cc84d3 100644 --- a/paddle/fluid/operators/dequantize_log_op.h +++ b/paddle/fluid/operators/dequantize_log_op.h @@ -15,9 +15,16 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h index eadd842c7f6..cef6590ae21 100644 --- a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h +++ b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include // NOLINT #include @@ -23,8 +24,6 @@ #include #include -#include - #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder_test.cc b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder_test.cc index 17d8973303b..2d78559625c 100644 --- a/paddle/fluid/operators/distributed/async_sparse_param_update_recorder_test.cc +++ b/paddle/fluid/operators/distributed/async_sparse_param_update_recorder_test.cc @@ -13,9 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/distributed/async_sparse_param_update_recorder.h" - #include - #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/collective_client.cc b/paddle/fluid/operators/distributed/collective_client.cc index 6d3f5343111..fcd3e6abead 100644 --- a/paddle/fluid/operators/distributed/collective_client.cc +++ b/paddle/fluid/operators/distributed/collective_client.cc @@ -12,11 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include // NOLINT -#include -#include "gflags/gflags.h" - #include "paddle/fluid/operators/distributed/collective_client.h" +#include +#include "gflags/gflags.h" DECLARE_int32(rpc_deadline); diff --git a/paddle/fluid/operators/distributed/collective_client.h b/paddle/fluid/operators/distributed/collective_client.h index 6a3a450a1fd..e7d8bb8df98 100644 --- a/paddle/fluid/operators/distributed/collective_client.h +++ b/paddle/fluid/operators/distributed/collective_client.h @@ -15,16 +15,27 @@ #pragma once #include // NOLINT +#include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/request_handler.h" +namespace paddle { +namespace framework { +class Scope; +class SelectedRows; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + DECLARE_int32(rpc_deadline); namespace paddle { diff --git a/paddle/fluid/operators/distributed/collective_server.cc b/paddle/fluid/operators/distributed/collective_server.cc index c95652400c2..cdd37742d2d 100644 --- a/paddle/fluid/operators/distributed/collective_server.cc +++ b/paddle/fluid/operators/distributed/collective_server.cc @@ -12,14 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // for removing the port file -#include -#include -#include -#include // NOLINT -#include - #include "paddle/fluid/operators/distributed/collective_server.h" +#include DEFINE_int32(collective_get_thread_num, 5, "number of threads for rpc get"); diff --git a/paddle/fluid/operators/distributed/collective_server.h b/paddle/fluid/operators/distributed/collective_server.h index 03c688a78e1..1015007ba0c 100644 --- a/paddle/fluid/operators/distributed/collective_server.h +++ b/paddle/fluid/operators/distributed/collective_server.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include #include #include // NOLINT @@ -22,12 +23,17 @@ limitations under the License. */ #include #include "gflags/gflags.h" - #include "paddle/fluid/operators/distributed/distributed.h" #include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler_impl.h" #include "paddle/fluid/operators/distributed/rpc_server.h" +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/collective_server_test.cc b/paddle/fluid/operators/distributed/collective_server_test.cc index be8c7a7dd40..92b2eb4b51e 100644 --- a/paddle/fluid/operators/distributed/collective_server_test.cc +++ b/paddle/fluid/operators/distributed/collective_server_test.cc @@ -13,22 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include #include #include -#include // NOLINT #include "gtest/gtest.h" -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/tensor_util.h" - #include "paddle/fluid/operators/distributed/collective_client.h" #include "paddle/fluid/operators/distributed/collective_server.h" -#include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/request_handler_impl.h" -#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +} // namespace paddle namespace framework = paddle::framework; namespace platform = paddle::platform; diff --git a/paddle/fluid/operators/distributed/communicator.h b/paddle/fluid/operators/distributed/communicator.h index 4a9a9eb1701..07fd4ed4960 100644 --- a/paddle/fluid/operators/distributed/communicator.h +++ b/paddle/fluid/operators/distributed/communicator.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include +#include #include #include #include @@ -26,8 +27,8 @@ limitations under the License. */ #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable_helper.h" diff --git a/paddle/fluid/operators/distributed/communicator_test.cc b/paddle/fluid/operators/distributed/communicator_test.cc index b9a6afa4cc3..38b7c8b0031 100644 --- a/paddle/fluid/operators/distributed/communicator_test.cc +++ b/paddle/fluid/operators/distributed/communicator_test.cc @@ -12,11 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include -#include -#include -#include #include "paddle/fluid/operators/distributed/communicator.h" diff --git a/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.cc b/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.cc index c2cb0d7f04e..7d6756b4136 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.cc @@ -19,6 +19,10 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h" +namespace grpc { +class ByteBuffer; +} // namespace grpc + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h b/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h index e9074574cdd..486870de7a5 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h +++ b/paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h @@ -26,8 +26,12 @@ limitations under the License. */ #include "grpc++/grpc++.h" #include "paddle/fluid/operators/distributed/variable_response.h" +struct grpc_byte_buffer; + namespace grpc { // A ZeroCopyInputStream that reads from grpc_byte_buffer +class ByteBuffer; + class GrpcBufferReader final : public ::google::protobuf::io::ZeroCopyInputStream { typedef void (CoreCodegenInterface::*OldReaderInitAPI)( diff --git a/paddle/fluid/operators/distributed/grpc/grpc_client.h b/paddle/fluid/operators/distributed/grpc/grpc_client.h index 6b6249540c6..22ca74a67e7 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_client.h +++ b/paddle/fluid/operators/distributed/grpc/grpc_client.h @@ -16,7 +16,6 @@ limitations under the License. */ #include #include - #include // NOLINT #include // NOLINT #include @@ -47,6 +46,18 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN +namespace grpc { +class Channel; +} // namespace grpc +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc index eddd89cf20c..0372846ce0d 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_serde.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_serde.cc @@ -17,19 +17,26 @@ limitations under the License. */ #endif #include #include -#include // NOLINT - -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/io/zero_copy_stream.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h" +#include "grpcpp/impl/codegen/byte_buffer.h" +#include "grpcpp/impl/codegen/slice.h" #include "paddle/fluid/operators/distributed/grpc/grpc_serde.h" #include "paddle/fluid/operators/distributed/grpc/grpc_variable_response.h" #include "paddle/fluid/operators/distributed/proto_encoder_helper.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" -#include "paddle/fluid/platform/port.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +namespace paddle { +namespace framework { +class Scope; +class Variable; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_serde.h b/paddle/fluid/operators/distributed/grpc/grpc_serde.h index 30e6907656e..932f3e2f069 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_serde.h +++ b/paddle/fluid/operators/distributed/grpc/grpc_serde.h @@ -24,10 +24,22 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/operators/distributed/distributed_pb.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/platform/port.h" -#include "paddle/fluid/operators/distributed/distributed_pb.h" +namespace grpc { +class ByteBuffer; +} // namespace grpc +namespace paddle { +namespace framework { +class Scope; +class Variable; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_server.cc b/paddle/fluid/operators/distributed/grpc/grpc_server.cc index 5c0232a50a9..47e114ff4b2 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_server.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_server.cc @@ -20,6 +20,20 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/grpc/grpc_serde.h" #include "paddle/fluid/operators/distributed/grpc/grpc_server.h" +namespace grpc { +class ChannelArguments; +} // namespace grpc +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +namespace operators { +namespace distributed { +class GRPCVariableResponse; +} // namespace distributed +} // namespace operators +} // namespace paddle + using ::grpc::ServerAsyncResponseWriter; DECLARE_bool(rpc_disable_reuse_port); diff --git a/paddle/fluid/operators/distributed/grpc/grpc_server.h b/paddle/fluid/operators/distributed/grpc/grpc_server.h index ee6950205b3..3d68b7e8ceb 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_server.h +++ b/paddle/fluid/operators/distributed/grpc/grpc_server.h @@ -37,6 +37,10 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/platform/profiler.h" +namespace grpc { +class ServerCompletionQueue; +} // namespace grpc + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc index e46d2fbe01c..7d7723f1945 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc +++ b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.cc @@ -12,16 +12,29 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include -#ifdef PADDLE_WITH_NCCL -#include -#endif +#include "google/protobuf/io/coded_stream.h" +#include "paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h" #include "paddle/fluid/operators/distributed/grpc/grpc_variable_response.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" +namespace google { +namespace protobuf { +namespace io { +class ZeroCopyInputStream; +} // namespace io +} // namespace protobuf +} // namespace google +namespace grpc { +class ByteBuffer; +} // namespace grpc + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.h b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.h index 3ca1d89f750..4d12b4a4bac 100644 --- a/paddle/fluid/operators/distributed/grpc/grpc_variable_response.h +++ b/paddle/fluid/operators/distributed/grpc/grpc_variable_response.h @@ -16,19 +16,30 @@ #include +#include "google/protobuf/io/coded_stream.h" +#include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/var_type.h" - -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/distributed/distributed_pb.h" #include "paddle/fluid/operators/distributed/grpc/grpc_bytebuffer_stream.h" #include "paddle/fluid/operators/distributed/variable_response.h" +namespace grpc { +class ByteBuffer; +} // namespace grpc +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor.cc b/paddle/fluid/operators/distributed/heart_beat_monitor.cc index 6736ea4336b..84ba9793c4e 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor.cc +++ b/paddle/fluid/operators/distributed/heart_beat_monitor.cc @@ -13,7 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/distributed/heart_beat_monitor.h" -#include // NOLINT + #include namespace paddle { diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor.h b/paddle/fluid/operators/distributed/heart_beat_monitor.h index 211e40757fc..cfef492de0e 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor.h +++ b/paddle/fluid/operators/distributed/heart_beat_monitor.h @@ -14,21 +14,18 @@ #pragma once +#include #include - #include #include // NOLINT #include #include +#include // NOLINT #include #include #include #include -#include // NOLINT - -#include - #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc b/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc index 699c03f6f28..7c2c0fbff11 100644 --- a/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc +++ b/paddle/fluid/operators/distributed/heart_beat_monitor_test.cc @@ -14,9 +14,6 @@ #include "paddle/fluid/operators/distributed/heart_beat_monitor.h" -#include -#include // NOLINT - #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed/large_scale_kv.h b/paddle/fluid/operators/distributed/large_scale_kv.h index 9e39e68cba7..b4388c0002a 100644 --- a/paddle/fluid/operators/distributed/large_scale_kv.h +++ b/paddle/fluid/operators/distributed/large_scale_kv.h @@ -16,7 +16,6 @@ #include #include - #include #include // NOLINT #include diff --git a/paddle/fluid/operators/distributed/parameter_prefetch.cc b/paddle/fluid/operators/distributed/parameter_prefetch.cc index a9378d61c3c..6b33c1f5fcd 100644 --- a/paddle/fluid/operators/distributed/parameter_prefetch.cc +++ b/paddle/fluid/operators/distributed/parameter_prefetch.cc @@ -12,30 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include "paddle/fluid/operators/distributed/parameter_prefetch.h" #include #include -#include #include #include -#include - -#include "paddle/fluid/operators/distributed/parameter_prefetch.h" - #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/tensor.h" - #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/rpc_client.h" -#include "paddle/fluid/operators/distributed/variable_response.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" + +namespace paddle { +namespace framework { +class ExecutionContext; +class Scope; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { namespace distributed { +class RPCClient; + using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = framework::SelectedRows; diff --git a/paddle/fluid/operators/distributed/parameter_prefetch.h b/paddle/fluid/operators/distributed/parameter_prefetch.h index 8605bcdcd86..6fd3a998813 100644 --- a/paddle/fluid/operators/distributed/parameter_prefetch.h +++ b/paddle/fluid/operators/distributed/parameter_prefetch.h @@ -20,6 +20,13 @@ #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class ExecutionContext; +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/parameter_recv.cc b/paddle/fluid/operators/distributed/parameter_recv.cc index 3b8479c91b0..a91df5b3c47 100644 --- a/paddle/fluid/operators/distributed/parameter_recv.cc +++ b/paddle/fluid/operators/distributed/parameter_recv.cc @@ -12,29 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include -#include -#include -#include - -#include "paddle/fluid/operators/distributed/parameter_recv.h" +#include "glog/logging.h" +#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/tensor.h" - +#include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/rpc_client.h" -#include "paddle/fluid/operators/distributed/variable_response.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" -#include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/fluid/operators/distributed/parameter_recv.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { namespace distributed { +class RPCClient; + using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = framework::SelectedRows; diff --git a/paddle/fluid/operators/distributed/parameter_send.cc b/paddle/fluid/operators/distributed/parameter_send.cc index 545b1f5e803..99af06bc7cc 100644 --- a/paddle/fluid/operators/distributed/parameter_send.cc +++ b/paddle/fluid/operators/distributed/parameter_send.cc @@ -14,26 +14,31 @@ #include "paddle/fluid/operators/distributed/parameter_send.h" #include -#include -#include #include -#include - +#include "glog/logging.h" +#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/tensor.h" - +#include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/rpc_client.h" -#include "paddle/fluid/operators/distributed/variable_response.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" -#include "paddle/fluid/string/printf.h" +#include "paddle/fluid/operators/distributed/request_handler.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace framework { +class Scope; +class Tensor; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { namespace distributed { +class RPCClient; + using LoDTensor = framework::LoDTensor; using LoDTensor = framework::LoDTensor; using SelectedRows = framework::SelectedRows; diff --git a/paddle/fluid/operators/distributed/request_handler_impl.h b/paddle/fluid/operators/distributed/request_handler_impl.h index 42621724e68..6d239673f91 100644 --- a/paddle/fluid/operators/distributed/request_handler_impl.h +++ b/paddle/fluid/operators/distributed/request_handler_impl.h @@ -15,7 +15,6 @@ #pragma once #include - #include #include #include @@ -33,6 +32,13 @@ #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/distributed/request_handler.h" +namespace paddle { +namespace framework { +class Scope; +class Variable; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/rpc_client.h b/paddle/fluid/operators/distributed/rpc_client.h index 69a5e327431..6a6a795a46b 100644 --- a/paddle/fluid/operators/distributed/rpc_client.h +++ b/paddle/fluid/operators/distributed/rpc_client.h @@ -17,13 +17,22 @@ #include // NOLINT #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/distributed/request_handler.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + DECLARE_int32(rpc_deadline); DECLARE_int32(rpc_retry_times); diff --git a/paddle/fluid/operators/distributed/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc index 691c2c1048f..52b4456f7b1 100644 --- a/paddle/fluid/operators/distributed/rpc_server.cc +++ b/paddle/fluid/operators/distributed/rpc_server.cc @@ -15,15 +15,23 @@ #include "paddle/fluid/operators/distributed/rpc_server.h" #include -#include -#include #include -#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { namespace distributed { +class RequestHandler; + void RPCServer::ShutDown() { VLOG(3) << "RPCServer ShutDown "; ShutDownImpl(); diff --git a/paddle/fluid/operators/distributed/rpc_server.h b/paddle/fluid/operators/distributed/rpc_server.h index 8c7b7f1d7ee..f83144f6268 100644 --- a/paddle/fluid/operators/distributed/rpc_server.h +++ b/paddle/fluid/operators/distributed/rpc_server.h @@ -18,6 +18,7 @@ #include #include #include // NOLINT +#include #include #include @@ -25,10 +26,21 @@ #include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { +class RequestHandler; + struct MonomerHandle { std::string var_name_; std::string rpc_name_; diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc index b52fb93e5bf..2e9d958ebfb 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc @@ -11,17 +11,20 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - -#ifdef PADDLE_WITH_NCCL -#include -#endif #include -#include // NOLINT -#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/distributed/sendrecvop_utils.h" -#include "paddle/fluid/operators/distributed/variable_response.h" -#include "paddle/fluid/platform/port.h" + +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +namespace memory { +namespace allocation { +class Allocation; +} // namespace allocation +} // namespace memory +} // namespace paddle DEFINE_bool(rpc_disable_reuse_port, false, "Disable SO_REUSEPORT or not."); DEFINE_int32(rpc_retry_bind_port, 3, diff --git a/paddle/fluid/operators/distributed/sendrecvop_utils.h b/paddle/fluid/operators/distributed/sendrecvop_utils.h index 5457101a5c9..8a382baa5be 100644 --- a/paddle/fluid/operators/distributed/sendrecvop_utils.h +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include #include #include @@ -27,6 +28,21 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/distributed_pb.h" #include "paddle/fluid/platform/port.h" +namespace paddle { +namespace framework { +class Tensor; +class Variable; +} // namespace framework +namespace memory { +namespace allocation { +class Allocation; +} // namespace allocation +} // namespace memory +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace distributed { diff --git a/paddle/fluid/operators/distributed/varhandle_test.cc b/paddle/fluid/operators/distributed/varhandle_test.cc index a0fcaf88647..7c52ef74b4c 100644 --- a/paddle/fluid/operators/distributed/varhandle_test.cc +++ b/paddle/fluid/operators/distributed/varhandle_test.cc @@ -12,11 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include // NOLINT - -#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/operators/distributed/request_handler.h" diff --git a/paddle/fluid/operators/distributed/variable_response.h b/paddle/fluid/operators/distributed/variable_response.h index d979cd8a881..be67a2396f7 100644 --- a/paddle/fluid/operators/distributed/variable_response.h +++ b/paddle/fluid/operators/distributed/variable_response.h @@ -16,17 +16,33 @@ #include +#include "google/protobuf/io/coded_stream.h" +#include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/framework/var_type.h" - -#include "google/protobuf/io/coded_stream.h" -#include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/distributed/distributed_pb.h" +namespace google { +namespace protobuf { +namespace io { +class CodedInputStream; +class ZeroCopyInputStream; +} // namespace io +} // namespace protobuf +} // namespace google +namespace paddle { +namespace framework { +class Variable; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + DECLARE_string(rpc_server_profile_path); namespace paddle { diff --git a/paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc b/paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc index 2ed2acb96dc..abc8d912840 100644 --- a/paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc +++ b/paddle/fluid/operators/distributed_ops/checkpoint_notify_op.cc @@ -9,15 +9,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" -#include "paddle/fluid/string/printf.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc index c9f9daf3b3c..755cbf017d9 100644 --- a/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/fetch_barrier_op.cc @@ -12,19 +12,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { +namespace distributed { +class RPCClient; +} // namespace distributed + class FetchBarrierOp : public framework::OperatorBase { public: FetchBarrierOp(const std::string& type, diff --git a/paddle/fluid/operators/distributed_ops/fl_listen_and_serv_op.h b/paddle/fluid/operators/distributed_ops/fl_listen_and_serv_op.h index 1199a63d16a..25ad16e3fce 100644 --- a/paddle/fluid/operators/distributed_ops/fl_listen_and_serv_op.h +++ b/paddle/fluid/operators/distributed_ops/fl_listen_and_serv_op.h @@ -31,9 +31,25 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/rpc_server.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class Executor; +class ProgramDesc; +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { +namespace distributed { +class RPCServer; +class RequestHandler; +} // namespace distributed + constexpr char kOptimizeBlocks[] = "optimize_blocks"; void FlRunServer(std::shared_ptr service); diff --git a/paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc b/paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc index e63f8824783..db8c2f3f2d8 100644 --- a/paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc +++ b/paddle/fluid/operators/distributed_ops/gen_nccl_id_op.cc @@ -12,18 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include #include +#include "glog/logging.h" #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/threadpool.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/operators/distributed/distributed.h" +#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/operators/distributed/request_handler_impl.h" -#include "paddle/fluid/platform/nccl_helper.h" +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed_ops/listen_and_serv_op.h b/paddle/fluid/operators/distributed_ops/listen_and_serv_op.h index b41e4e87722..bacfd32cc73 100644 --- a/paddle/fluid/operators/distributed_ops/listen_and_serv_op.h +++ b/paddle/fluid/operators/distributed_ops/listen_and_serv_op.h @@ -31,9 +31,25 @@ limitations under the License. */ #include "paddle/fluid/operators/distributed/rpc_server.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class Executor; +class ProgramDesc; +class Scope; +} // namespace framework +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { +namespace distributed { +class RPCServer; +class RequestHandler; +} // namespace distributed + constexpr char kOptimizeBlocks[] = "optimize_blocks"; constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id"; constexpr char kCheckpointBlockId[] = "checkpint_block_id"; diff --git a/paddle/fluid/operators/distributed_ops/prefetch_op.cc b/paddle/fluid/operators/distributed_ops/prefetch_op.cc index 6037ab1523e..007dbbbfbf5 100644 --- a/paddle/fluid/operators/distributed_ops/prefetch_op.cc +++ b/paddle/fluid/operators/distributed_ops/prefetch_op.cc @@ -12,18 +12,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { +namespace distributed { +class RPCClient; +} // namespace distributed + class PrefetchOp : public framework::OperatorBase { public: PrefetchOp(const std::string& type, const framework::VariableNameMap& inputs, diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc index 2547ba3acb1..9729d0dadd7 100644 --- a/paddle/fluid/operators/distributed_ops/recv_op.cc +++ b/paddle/fluid/operators/distributed_ops/recv_op.cc @@ -12,22 +12,30 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/communicator.h" -#include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/parameter_recv.h" -#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { +namespace distributed { +class RPCClient; +} // namespace distributed + class RecvOp : public framework::OperatorBase { public: RecvOp(const std::string &type, const framework::VariableNameMap &inputs, diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc index befdf4e9388..4727b3bb249 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.cc @@ -13,8 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h" + #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h index 43dd9c3c98a..d8639627c3e 100644 --- a/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h +++ b/paddle/fluid/operators/distributed_ops/ref_by_trainer_id_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc index a8e9379d214..5aa2ba26aa4 100644 --- a/paddle/fluid/operators/distributed_ops/send_barrier_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_barrier_op.cc @@ -12,20 +12,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/platform/profiler.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { +namespace distributed { +class RPCClient; +} // namespace distributed + class SendBarrierOp : public framework::OperatorBase { public: SendBarrierOp(const std::string& type, diff --git a/paddle/fluid/operators/distributed_ops/send_op.cc b/paddle/fluid/operators/distributed_ops/send_op.cc index 53e3d70f960..a4192c18afa 100644 --- a/paddle/fluid/operators/distributed_ops/send_op.cc +++ b/paddle/fluid/operators/distributed_ops/send_op.cc @@ -12,23 +12,29 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include // NOLINT -#include - -#include "paddle/fluid/framework/blocking_queue.h" -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/distributed/communicator.h" -#include "paddle/fluid/operators/distributed/communicator_common.h" #include "paddle/fluid/operators/distributed/distributed.h" -#include "paddle/fluid/operators/distributed/parameter_send.h" -#include "paddle/fluid/operators/distributed_ops/send_recv_util.h" -#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { +namespace distributed { +class RPCClient; +} // namespace distributed + class SendOp : public framework::OperatorBase { public: SendOp(const std::string& type, const framework::VariableNameMap& inputs, diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 97624944ca1..68a98e7c6bc 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -20,6 +20,19 @@ limitations under the License. */ #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index 152f20273a1..c4efc4ab72d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc index 5a398fa50fe..ddd69203fd3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc @@ -13,9 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_floordiv_op.h" + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker { diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cc b/paddle/fluid/operators/elementwise/elementwise_max_op.cc index 692bc015c5b..be6a6330547 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cc @@ -13,10 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_max_op.h" -#include + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cc b/paddle/fluid/operators/elementwise/elementwise_min_op.cc index 1b2364a5a53..bd40763e05a 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cc @@ -13,10 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_min_op.h" -#include + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.h b/paddle/fluid/operators/elementwise/elementwise_min_op.h index 1a49a601398..5a3e7f90f3c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc index 8c2e62bed19..d8ad0a353c9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc @@ -13,9 +13,26 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_mod_op.h" + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { class ElementwiseModOpMaker : public ElementwiseOpMaker { diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.cc b/paddle/fluid/operators/elementwise/elementwise_pow_op.cc index 1321eee8457..ea0e8e7c013 100644 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.cc @@ -10,10 +10,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_pow_op.h" -#include + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/elementwise_pow_op.h b/paddle/fluid/operators/elementwise/elementwise_pow_op.h index a910c326196..535d838209d 100755 --- a/paddle/fluid/operators/elementwise/elementwise_pow_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_pow_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index 9603b022d5d..90f4ebb99ec 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -13,10 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_sub_op.h" -#include + #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc index caaaf2c931d..3dcf5bf6a32 100644 --- a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc +++ b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc @@ -14,6 +14,16 @@ #include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h" +namespace paddle { +namespace framework { +class ExecutionContext; +} // namespace framework +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { template diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc index be8b8d6c2f7..15c31a4cece 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_grad_grad.cc @@ -12,20 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include -#include -#include #include "gtest/gtest.h" -#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" USE_OP(elementwise_add); diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc index 6ec8f2c2355..cf9e9dbb04b 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -12,14 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/enqueue_op.cc b/paddle/fluid/operators/enqueue_op.cc index 9b367a72fb5..a7920488f2e 100644 --- a/paddle/fluid/operators/enqueue_op.cc +++ b/paddle/fluid/operators/enqueue_op.cc @@ -13,13 +13,24 @@ // limitations under the License. #include -#include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + using LoDTensor = paddle::framework::LoDTensor; using LoDTensorBlockingQueueHolder = paddle::operators::reader::LoDTensorBlockingQueueHolder; diff --git a/paddle/fluid/operators/eye_op.h b/paddle/fluid/operators/eye_op.h index 0eefe7d2163..d5ad27596d6 100644 --- a/paddle/fluid/operators/eye_op.h +++ b/paddle/fluid/operators/eye_op.h @@ -51,7 +51,7 @@ class EyeKernel : public framework::OpKernel { auto& dev_ctx = ctx.template device_context(); set_zero(dev_ctx, out_tensor, static_cast(0)); - int64_t num_eyes = std::min(num_rows, num_columns); + int64_t num_eyes = (std::min)(num_rows, num_columns); platform::ForRange for_range(dev_ctx, num_eyes); EyeFunctor functor(num_columns, out_data); for_range(functor); diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc index 17cb4556d45..37a442a7815 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc @@ -13,10 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h" -#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/cudnn_helper.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/gather_test.cc b/paddle/fluid/operators/gather_test.cc index f6b156eb30d..ea1fa813928 100644 --- a/paddle/fluid/operators/gather_test.cc +++ b/paddle/fluid/operators/gather_test.cc @@ -13,10 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include -#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/gather.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc index 93f9e108723..f0903bdfce9 100644 --- a/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc +++ b/paddle/fluid/operators/grid_sampler_cudnn_op.cu.cc @@ -15,6 +15,12 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/cudnn_helper.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/gru_op.cu.cc b/paddle/fluid/operators/gru_op.cu.cc index ba918b3def2..bdc5debaea7 100644 --- a/paddle/fluid/operators/gru_op.cu.cc +++ b/paddle/fluid/operators/gru_op.cu.cc @@ -14,6 +14,13 @@ limitations under the License. */ #include "paddle/fluid/operators/gru_op.h" +namespace paddle { +namespace platform { +class CUDADeviceContext; +struct CUDAPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/hash_op.cc b/paddle/fluid/operators/hash_op.cc index b6017a6eafc..47b480c11c2 100644 --- a/paddle/fluid/operators/hash_op.cc +++ b/paddle/fluid/operators/hash_op.cc @@ -13,8 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/hash_op.h" + #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/hash_op.h b/paddle/fluid/operators/hash_op.h index c2d53000491..b5b3f3de70c 100644 --- a/paddle/fluid/operators/hash_op.h +++ b/paddle/fluid/operators/hash_op.h @@ -18,6 +18,7 @@ extern "C" { #include } #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/increment_op.cc b/paddle/fluid/operators/increment_op.cc index b4f33dad927..e8edfb99f9f 100644 --- a/paddle/fluid/operators/increment_op.cc +++ b/paddle/fluid/operators/increment_op.cc @@ -13,9 +13,23 @@ // limitations under the License. #include "paddle/fluid/operators/increment_op.h" -#include + #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index 9b92ce3e538..1ac1c26796c 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -13,8 +13,24 @@ // limitations under the License. #include "paddle/fluid/operators/isfinite_op.h" + #include -#include + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/isfinite_op.h b/paddle/fluid/operators/isfinite_op.h index 2fc0d58669b..a54134910d0 100644 --- a/paddle/fluid/operators/isfinite_op.h +++ b/paddle/fluid/operators/isfinite_op.h @@ -15,12 +15,19 @@ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/transform.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/isfinite_v2_op.cc b/paddle/fluid/operators/isfinite_v2_op.cc index 72da43e3bc6..fcbb4c5bf6a 100644 --- a/paddle/fluid/operators/isfinite_v2_op.cc +++ b/paddle/fluid/operators/isfinite_v2_op.cc @@ -13,10 +13,31 @@ // limitations under the License. #include "paddle/fluid/operators/isfinite_v2_op.h" + #include -#include + #include "paddle/fluid/operators/common_infer_shape_functions.h" -#include "paddle/fluid/platform/float16.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace operators { +template +class OverflowKernel; +} // namespace operators +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +struct float16; +} // namespace platform +} // namespace paddle namespace plat = paddle::platform; diff --git a/paddle/fluid/operators/isfinite_v2_op.h b/paddle/fluid/operators/isfinite_v2_op.h index 9f0aa63ce80..332c50d7551 100644 --- a/paddle/fluid/operators/isfinite_v2_op.h +++ b/paddle/fluid/operators/isfinite_v2_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -22,6 +23,12 @@ #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/transform.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/jit/gen/act.h b/paddle/fluid/operators/jit/gen/act.h index 585196e939c..cd360caa39b 100644 --- a/paddle/fluid/operators/jit/gen/act.h +++ b/paddle/fluid/operators/jit/gen/act.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/blas.h b/paddle/fluid/operators/jit/gen/blas.h index ff4a13a3679..d3d9eddd2ee 100644 --- a/paddle/fluid/operators/jit/gen/blas.h +++ b/paddle/fluid/operators/jit/gen/blas.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/embseqpool.cc b/paddle/fluid/operators/jit/gen/embseqpool.cc index 331a4b0d075..b4e63d87eac 100644 --- a/paddle/fluid/operators/jit/gen/embseqpool.cc +++ b/paddle/fluid/operators/jit/gen/embseqpool.cc @@ -13,10 +13,11 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/gen/embseqpool.h" + #include // offsetof #include #include -#include "paddle/fluid/operators/jit/gen/act.h" // for exp_float_consts ones + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/embseqpool.h b/paddle/fluid/operators/jit/gen/embseqpool.h index 67a39350952..8353e5846f7 100644 --- a/paddle/fluid/operators/jit/gen/embseqpool.h +++ b/paddle/fluid/operators/jit/gen/embseqpool.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/gru.cc b/paddle/fluid/operators/jit/gen/gru.cc index b5b0cffa806..fbdf49d5d58 100644 --- a/paddle/fluid/operators/jit/gen/gru.cc +++ b/paddle/fluid/operators/jit/gen/gru.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/gen/gru.h" + #include // offsetof #include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/gru.h b/paddle/fluid/operators/jit/gen/gru.h index e047a65cb4b..588d11820b2 100644 --- a/paddle/fluid/operators/jit/gen/gru.h +++ b/paddle/fluid/operators/jit/gen/gru.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/act.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" diff --git a/paddle/fluid/operators/jit/gen/hopv.h b/paddle/fluid/operators/jit/gen/hopv.h index 575dec68a58..113c66a2558 100644 --- a/paddle/fluid/operators/jit/gen/hopv.h +++ b/paddle/fluid/operators/jit/gen/hopv.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/lstm.cc b/paddle/fluid/operators/jit/gen/lstm.cc index 2c3bc985e9a..211dfc5ecad 100644 --- a/paddle/fluid/operators/jit/gen/lstm.cc +++ b/paddle/fluid/operators/jit/gen/lstm.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/gen/lstm.h" + #include // offsetof #include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/lstm.h b/paddle/fluid/operators/jit/gen/lstm.h index 6f232906569..c980670d482 100644 --- a/paddle/fluid/operators/jit/gen/lstm.h +++ b/paddle/fluid/operators/jit/gen/lstm.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/act.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" diff --git a/paddle/fluid/operators/jit/gen/matmul.cc b/paddle/fluid/operators/jit/gen/matmul.cc index 3a455334f58..047d0d3e1ca 100644 --- a/paddle/fluid/operators/jit/gen/matmul.cc +++ b/paddle/fluid/operators/jit/gen/matmul.cc @@ -13,9 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/gen/matmul.h" + #include // offsetof #include -#include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/matmul.h b/paddle/fluid/operators/jit/gen/matmul.h index 881cea581ac..4f04f7606d2 100644 --- a/paddle/fluid/operators/jit/gen/matmul.h +++ b/paddle/fluid/operators/jit/gen/matmul.h @@ -17,6 +17,7 @@ #include // for malloc and free #include #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/seqpool.h b/paddle/fluid/operators/jit/gen/seqpool.h index a0f1bb8299a..cb562c4c9a6 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.h +++ b/paddle/fluid/operators/jit/gen/seqpool.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/sgd.cc b/paddle/fluid/operators/jit/gen/sgd.cc index 40f8298af39..1452d4139b0 100644 --- a/paddle/fluid/operators/jit/gen/sgd.cc +++ b/paddle/fluid/operators/jit/gen/sgd.cc @@ -13,9 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/gen/sgd.h" + #include // offsetof #include -#include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/sgd.h b/paddle/fluid/operators/jit/gen/sgd.h index 80b1809bbbf..403d97b8fec 100644 --- a/paddle/fluid/operators/jit/gen/sgd.h +++ b/paddle/fluid/operators/jit/gen/sgd.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen/vbroadcast.h b/paddle/fluid/operators/jit/gen/vbroadcast.h index 27c75f6f710..7d30fe5751b 100644 --- a/paddle/fluid/operators/jit/gen/vbroadcast.h +++ b/paddle/fluid/operators/jit/gen/vbroadcast.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/operators/jit/gen/jitcode.h" diff --git a/paddle/fluid/operators/jit/gen_base.h b/paddle/fluid/operators/jit/gen_base.h index 033c603c07c..27b85763415 100644 --- a/paddle/fluid/operators/jit/gen_base.h +++ b/paddle/fluid/operators/jit/gen_base.h @@ -18,6 +18,7 @@ #include // for unique_ptr #include #include + #include "paddle/fluid/operators/jit/kernel_base.h" DECLARE_bool(dump_jitcode); diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index 39e5ee2be15..b6dd49b7772 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -21,6 +21,7 @@ #include #include // for std::move #include + #include "paddle/fluid/operators/jit/gen_base.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/operators/jit/kernel_key.h" @@ -31,6 +32,8 @@ namespace paddle { namespace operators { namespace jit { +class GenBase; + template inline typename std::enable_if< std::is_same::value && diff --git a/paddle/fluid/operators/jit/kernel_pool.cc b/paddle/fluid/operators/jit/kernel_pool.cc index f1719be9873..7b6b13c3d98 100644 --- a/paddle/fluid/operators/jit/kernel_pool.cc +++ b/paddle/fluid/operators/jit/kernel_pool.cc @@ -13,9 +13,6 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/kernel_pool.h" -#include // for shared_ptr -#include -#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/jit/kernel_pool.h b/paddle/fluid/operators/jit/kernel_pool.h index 48435cf6ef2..763be7d3cf5 100644 --- a/paddle/fluid/operators/jit/kernel_pool.h +++ b/paddle/fluid/operators/jit/kernel_pool.h @@ -20,6 +20,7 @@ #include #include // for move #include + #include "paddle/fluid/operators/jit/gen_base.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/operators/jit/kernel_key.h" @@ -29,6 +30,8 @@ namespace paddle { namespace operators { namespace jit { +struct KernelKey; + extern std::map>& GetJITCodesMap(); template diff --git a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h index 49b1a1fea4b..6e0c972e27a 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h +++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/operators/jit/kernel_base.h" namespace paddle { diff --git a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.h b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.h index 7b9f676050d..6a44bb25612 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.h +++ b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/operators/jit/kernel_base.h" namespace paddle { diff --git a/paddle/fluid/operators/jit/more/mix/mix.h b/paddle/fluid/operators/jit/more/mix/mix.h index 035425317ed..240cbbcda4f 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.h +++ b/paddle/fluid/operators/jit/more/mix/mix.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/operators/jit/kernel_base.h" namespace paddle { diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index b38cc107b8e..ee31c8df2f8 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 136b99e0aef..b8d5e2c2407 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/operators/jit/helper.h" #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index ac0405b9a6e..231ff941278 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -13,9 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/label_smooth_op.h" -#include + #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/layer_norm_op.h b/paddle/fluid/operators/layer_norm_op.h index 6968c1a5b13..931cd6d1794 100644 --- a/paddle/fluid/operators/layer_norm_op.h +++ b/paddle/fluid/operators/layer_norm_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.cu.h" @@ -27,6 +28,14 @@ limitations under the License. */ #endif #include "paddle/fluid/operators/math/math_function.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +class CUDADeviceContext; +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lod_array_length_op.cc b/paddle/fluid/operators/lod_array_length_op.cc index 49e8cbbbaab..70da0149cad 100644 --- a/paddle/fluid/operators/lod_array_length_op.cc +++ b/paddle/fluid/operators/lod_array_length_op.cc @@ -12,9 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc index 7cbfbd03e1d..a9128b15bdb 100644 --- a/paddle/fluid/operators/lod_rank_table_op.cc +++ b/paddle/fluid/operators/lod_rank_table_op.cc @@ -11,8 +11,21 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lod_tensor_to_array_op.cc b/paddle/fluid/operators/lod_tensor_to_array_op.cc index b130e84933b..db07a104a89 100644 --- a/paddle/fluid/operators/lod_tensor_to_array_op.cc +++ b/paddle/fluid/operators/lod_tensor_to_array_op.cc @@ -11,14 +11,19 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "paddle/fluid/framework/lod_rank_table.h" -#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/port.h" + +namespace paddle { +namespace framework { +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/beam_search.cc b/paddle/fluid/operators/math/beam_search.cc index 550de1aadde..5271da91b8c 100644 --- a/paddle/fluid/operators/math/beam_search.cc +++ b/paddle/fluid/operators/math/beam_search.cc @@ -13,8 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/beam_search.h" -#include -#include + +namespace paddle { +namespace framework { +class LoDTensor; +class Tensor; +} // namespace framework +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/beam_search_test.cc b/paddle/fluid/operators/math/beam_search_test.cc index 7ea8eb8b00d..97ce3d3f878 100644 --- a/paddle/fluid/operators/math/beam_search_test.cc +++ b/paddle/fluid/operators/math/beam_search_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/beam_search.h" + #include -#include void PrepareCPUTensors(paddle::framework::LoDTensor* ids, paddle::framework::LoDTensor* scores, diff --git a/paddle/fluid/operators/math/blas.cc b/paddle/fluid/operators/math/blas.cc index 2a7ce83967f..3bc1b4f4048 100644 --- a/paddle/fluid/operators/math/blas.cc +++ b/paddle/fluid/operators/math/blas.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/operators/math/blas.h" - #include + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index 42a60e9220c..562e2de3bd3 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -17,6 +17,13 @@ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" +namespace paddle { +namespace framework { +class ExecutionContext; +class Tensor; +} // namespace framework +} // namespace paddle + #ifdef PADDLE_WITH_MKLML #include "paddle/fluid/platform/dynload/mklml.h" #endif diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index 515d6a2435e..c53c453897f 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -695,9 +695,9 @@ void Blas::BatchedGEMM( CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, int K, T alpha, const T **A, const T **B, T beta, T **C, int batchCount) const { #ifdef PADDLE_WITH_MKLML - const int lda = std::max((transA == CblasNoTrans) ? K : M, 1); - const int ldb = std::max((transB == CblasNoTrans) ? N : K, 1); - const int ldc = std::max(N, 1); + const int lda = (std::max)((transA == CblasNoTrans) ? K : M, 1); + const int ldb = (std::max)((transB == CblasNoTrans) ? N : K, 1); + const int ldc = (std::max)(N, 1); CBlas::GEMM_BATCH(CblasRowMajor, &transA, &transB, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc, 1 /* group_count */, &batchCount); diff --git a/paddle/fluid/operators/math/concat_and_split.cc b/paddle/fluid/operators/math/concat_and_split.cc index b6bd58d118e..3b0c3c1686a 100644 --- a/paddle/fluid/operators/math/concat_and_split.cc +++ b/paddle/fluid/operators/math/concat_and_split.cc @@ -13,7 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/concat_and_split.h" -#include + +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class CPUDeviceContext; +struct bfloat16; +struct float16; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc index 270a9d3f80a..094e2059c4d 100644 --- a/paddle/fluid/operators/math/concat_test.cc +++ b/paddle/fluid/operators/math/concat_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include + #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/math/concat_and_split.h" diff --git a/paddle/fluid/operators/math/context_project.cc b/paddle/fluid/operators/math/context_project.cc index 537d0b47868..927d610e2ce 100644 --- a/paddle/fluid/operators/math/context_project.cc +++ b/paddle/fluid/operators/math/context_project.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/context_project.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/context_project.h b/paddle/fluid/operators/math/context_project.h index 051c6019d74..08bb555c593 100644 --- a/paddle/fluid/operators/math/context_project.h +++ b/paddle/fluid/operators/math/context_project.h @@ -16,8 +16,10 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/operators/math/blas.h" + #include "paddle/fluid/operators/math/im2col.h" namespace paddle { diff --git a/paddle/fluid/operators/math/cos_sim_functor.cc b/paddle/fluid/operators/math/cos_sim_functor.cc index cbe16999124..f7770050bee 100644 --- a/paddle/fluid/operators/math/cos_sim_functor.cc +++ b/paddle/fluid/operators/math/cos_sim_functor.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/cos_sim_functor.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/cos_sim_functor.h b/paddle/fluid/operators/math/cos_sim_functor.h index d74662e68e7..9a24bfc3312 100644 --- a/paddle/fluid/operators/math/cos_sim_functor.h +++ b/paddle/fluid/operators/math/cos_sim_functor.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/hostdevice.h" @@ -43,6 +44,7 @@ struct CosSimFunctor { tep_x = x[i]; tep_y = y[i]; xx += tep_x * tep_x; + yy += tep_y * tep_y; xy += tep_x * tep_y; } diff --git a/paddle/fluid/operators/math/cpu_vec.h b/paddle/fluid/operators/math/cpu_vec.h index 925f3b6161a..eb7c622e596 100644 --- a/paddle/fluid/operators/math/cpu_vec.h +++ b/paddle/fluid/operators/math/cpu_vec.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/math/cpu_vec_test.cc b/paddle/fluid/operators/math/cpu_vec_test.cc index 6490d81cec7..07fe9c30f39 100644 --- a/paddle/fluid/operators/math/cpu_vec_test.cc +++ b/paddle/fluid/operators/math/cpu_vec_test.cc @@ -16,12 +16,10 @@ limitations under the License. */ #include #include #include -#include "gflags/gflags.h" + #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/operators/math/cpu_vec.h" -#include "paddle/fluid/platform/port.h" inline double GetCurrentUS() { struct timeval time; diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index 7a1ed47d182..23840143a44 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/cross_entropy.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/gru_compute.cc b/paddle/fluid/operators/math/gru_compute.cc index 07c5cbf3337..4b8a6274cce 100644 --- a/paddle/fluid/operators/math/gru_compute.cc +++ b/paddle/fluid/operators/math/gru_compute.cc @@ -10,10 +10,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/gru_compute.h" + #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/detail/gru_cpu_kernel.h" #include "paddle/fluid/operators/math/detail/gru_kernel.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 6fb393d791c..8efd35ca108 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -13,9 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/im2col.h" -#include + #include "paddle/fluid/operators/math/im2col_cfo_cpu.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/lstm_compute.cc b/paddle/fluid/operators/math/lstm_compute.cc index 94bbcbb5067..7e74f688019 100644 --- a/paddle/fluid/operators/math/lstm_compute.cc +++ b/paddle/fluid/operators/math/lstm_compute.cc @@ -13,9 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/lstm_compute.h" + #include "paddle/fluid/operators/math/detail/lstm_cpu_kernel.h" #include "paddle/fluid/operators/math/detail/lstm_kernel.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/matrix_bit_code.cc b/paddle/fluid/operators/math/matrix_bit_code.cc index d6f51c6e5c6..7c50ba630db 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.cc +++ b/paddle/fluid/operators/math/matrix_bit_code.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/matrix_bit_code.h" -#include -#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/matrix_inverse.cu.cc b/paddle/fluid/operators/math/matrix_inverse.cu.cc index 614f89a048c..950aed0aa49 100644 --- a/paddle/fluid/operators/math/matrix_inverse.cu.cc +++ b/paddle/fluid/operators/math/matrix_inverse.cu.cc @@ -12,14 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/math/matrix_inverse.h" -#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/math/blas.h" +namespace paddle { +namespace platform { +class CUDADeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { +template +class MatrixInverseFunctor; + template class MatrixInverseFunctor { public: diff --git a/paddle/fluid/operators/math/pooling.cc b/paddle/fluid/operators/math/pooling.cc index 1e86c2e7a32..40cea7483f3 100644 --- a/paddle/fluid/operators/math/pooling.cc +++ b/paddle/fluid/operators/math/pooling.cc @@ -13,9 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/pooling.h" #include -#include -#include -#include "paddle/fluid/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/sample_prob.cc b/paddle/fluid/operators/math/sample_prob.cc index 99aa318453e..16342493e45 100644 --- a/paddle/fluid/operators/math/sample_prob.cc +++ b/paddle/fluid/operators/math/sample_prob.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sample_prob.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/sample_prob.h b/paddle/fluid/operators/math/sample_prob.h index e5a6d84cb2b..7b08df660a0 100644 --- a/paddle/fluid/operators/math/sample_prob.h +++ b/paddle/fluid/operators/math/sample_prob.h @@ -16,10 +16,17 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/eigen.h" + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/math/sampler.h" +namespace paddle { +namespace platform { +class CUDADeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index 81ad620466e..5cb1cc5dc03 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" -#include #include "gtest/gtest.h" #include "paddle/fluid/operators/math/math_function.h" diff --git a/paddle/fluid/operators/math/sequence2batch.cc b/paddle/fluid/operators/math/sequence2batch.cc index 300a3692012..852700fa7ff 100644 --- a/paddle/fluid/operators/math/sequence2batch.cc +++ b/paddle/fluid/operators/math/sequence2batch.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sequence2batch.h" +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 076df017642..e29313e9f74 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -14,6 +14,16 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sequence_padding.h" +namespace paddle { +namespace framework { +class LoDTensor; +class Tensor; +} // namespace framework +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/sequence_padding_test.cc b/paddle/fluid/operators/math/sequence_padding_test.cc index eab4553ae8b..8892a17886a 100644 --- a/paddle/fluid/operators/math/sequence_padding_test.cc +++ b/paddle/fluid/operators/math/sequence_padding_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sequence_padding.h" + #include -#include template void TestSequencePadding(const DeviceContext &context, diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index ee5b22ca855..78cbdf311ad 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -14,6 +14,12 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sequence_scale.h" +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/sequence_scale.h b/paddle/fluid/operators/math/sequence_scale.h index 202243985c1..d84513e024d 100644 --- a/paddle/fluid/operators/math/sequence_scale.h +++ b/paddle/fluid/operators/math/sequence_scale.h @@ -17,6 +17,12 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace math { @@ -40,6 +46,7 @@ namespace math { * total_sequence_length is the sum of all sequences' * length. * \param scales Array. The i-th sequence will be scaled by scales[i]. + * \param num_seq Number of sequence * */ diff --git a/paddle/fluid/operators/math/vol2col.cc b/paddle/fluid/operators/math/vol2col.cc index 794fc647172..42bf1f471de 100644 --- a/paddle/fluid/operators/math/vol2col.cc +++ b/paddle/fluid/operators/math/vol2col.cc @@ -13,7 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/vol2col.h" -#include + +namespace paddle { +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/vol2col_test.cc b/paddle/fluid/operators/math/vol2col_test.cc index aa979c4f109..6ed5a0943eb 100644 --- a/paddle/fluid/operators/math/vol2col_test.cc +++ b/paddle/fluid/operators/math/vol2col_test.cc @@ -13,9 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/vol2col.h" + #include -#include -#include template void testVol2col() { diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index dc83e4d9648..8cd4fa12be4 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -65,7 +65,7 @@ static void GetBroadcastFromDims(const int x_ndim, const std::int64_t* x_dims, std::int64_t* x_bd_dims, std::int64_t* y_bd_dims, std::int64_t* out_bd_dims) { - const int ndim = std::max(x_ndim, y_ndim); + const int ndim = (std::max)(x_ndim, y_ndim); std::fill(x_bd_dims, x_bd_dims + ndim - x_ndim, 1); std::fill(y_bd_dims, y_bd_dims + ndim - y_ndim, 1); std::copy(x_dims, x_dims + x_ndim, x_bd_dims + ndim - x_ndim); @@ -79,7 +79,7 @@ static void GetBroadcastFromDims(const int x_ndim, const std::int64_t* x_dims, if (x_bd_dims[i] == 0 || y_bd_dims[i] == 0) { out_bd_dims[i] = 0; } else { - out_bd_dims[i] = std::max(x_bd_dims[i], y_bd_dims[i]); + out_bd_dims[i] = (std::max)(x_bd_dims[i], y_bd_dims[i]); } } } @@ -229,7 +229,7 @@ void MatMulFunction(const Tensor* X, const Tensor* Y, "Input(X) has error dim.")); } const int N = trans_y ? y_dims[y_ndim - 2] : y_dims[y_ndim - 1]; - const int ndim = std::max(x_ndim, y_ndim); + const int ndim = (std::max)(x_ndim, y_ndim); std::vector x_broadcast_dims(ndim); std::vector y_broadcast_dims(ndim); std::vector out_broadcast_dims(ndim); diff --git a/paddle/fluid/operators/max_sequence_len_op.cc b/paddle/fluid/operators/max_sequence_len_op.cc index b47ec8bc70a..4f73de086f2 100644 --- a/paddle/fluid/operators/max_sequence_len_op.cc +++ b/paddle/fluid/operators/max_sequence_len_op.cc @@ -12,10 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index 87d914aa797..584de34c5d3 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -13,7 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class LoDTensor; +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 487deb11b48..aecf67fc3bb 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -15,6 +15,15 @@ #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index 8a02a697cbb..98f368aa7a9 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -12,10 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" #include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 19ee8764e27..a6cda154e55 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -12,12 +12,16 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include #include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +namespace paddle { +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 40737f4cd02..0bec5619f54 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -12,13 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include -#include "paddle/fluid/framework/tensor.h" + #include "paddle/fluid/operators/fc_op.h" -#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/variant.h" + +namespace paddle { +namespace framework { +class LoDTensor; +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc index 00c10cecbf4..9ee653ec589 100644 --- a/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc @@ -12,10 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/lrn_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index 5ca0ed1182e..3ae34fe0e90 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -12,12 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/platform/mkldnn_helper.h" +namespace paddle { +namespace platform { +class MKLDNNDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 1dd1ad11786..4f0b7cab47e 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -13,12 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include "paddle/fluid/framework/data_layout_transform.h" -#include "paddle/fluid/memory/malloc.h" + #include "paddle/fluid/operators/mul_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 9df30b3295c..bf12c61a4d9 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/operators/pool_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_reuse.h" diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 5014381a4e2..0b159f9dcfa 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -12,12 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "mkldnn.hpp" #include "paddle/fluid/operators/softmax_op.h" #include "paddle/fluid/platform/mkldnn_reuse.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index 1e0e13abb7c..414312fe97e 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -24,13 +24,19 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" -#include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/sum_op.h" -#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/mkldnn_helper.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +namespace platform { +class CPUDeviceContext; +class MKLDNNDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/nccl/nccl_gpu_common.cc b/paddle/fluid/operators/nccl/nccl_gpu_common.cc index 70d80e26e5c..169af47e95a 100644 --- a/paddle/fluid/operators/nccl/nccl_gpu_common.cc +++ b/paddle/fluid/operators/nccl/nccl_gpu_common.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/nccl/nccl_gpu_common.h" -#include "paddle/fluid/platform/gpu_info.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/operators/op_debug_string_test.cc b/paddle/fluid/operators/op_debug_string_test.cc index 67d71fe82ad..7c1cf9109c5 100644 --- a/paddle/fluid/operators/op_debug_string_test.cc +++ b/paddle/fluid/operators/op_debug_string_test.cc @@ -13,10 +13,10 @@ // limitations under the License. #include + #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" USE_OP(elementwise_add_grad); diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 3fb18365e52..80faf833be5 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -12,13 +12,21 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include -#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/assign_op.h" #include "paddle/fluid/operators/tensor_formatter.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class LoDTensor; +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { using framework::GradVarName; diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc index 9b7a923fb4b..ec9d1fde453 100644 --- a/paddle/fluid/operators/rank_loss_op.cc +++ b/paddle/fluid/operators/rank_loss_op.cc @@ -13,9 +13,23 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/rank_loss_op.h" -#include + #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reader/buffered_reader.h b/paddle/fluid/operators/reader/buffered_reader.h index 42c087b9e47..041d36a9343 100644 --- a/paddle/fluid/operators/reader/buffered_reader.h +++ b/paddle/fluid/operators/reader/buffered_reader.h @@ -18,6 +18,7 @@ #include #include #include + #include "ThreadPool.h" #include "paddle/fluid/framework/reader.h" #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/reader/py_reader.cc b/paddle/fluid/operators/reader/py_reader.cc index 2100aeb7cf4..ad79f6bbc4c 100644 --- a/paddle/fluid/operators/reader/py_reader.cc +++ b/paddle/fluid/operators/reader/py_reader.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/operators/reader/py_reader.h" -#include namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reader/py_reader.h b/paddle/fluid/operators/reader/py_reader.h index d827cdd0b87..3492d578048 100644 --- a/paddle/fluid/operators/reader/py_reader.h +++ b/paddle/fluid/operators/reader/py_reader.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" @@ -24,6 +25,8 @@ namespace paddle { namespace operators { namespace reader { +class LoDTensorBlockingQueue; + class PyReader : public framework::FileReader { public: explicit PyReader( diff --git a/paddle/fluid/operators/reader/reader_blocking_queue_test.cc b/paddle/fluid/operators/reader/reader_blocking_queue_test.cc index dc0940ac0b7..86c9f38ad3f 100644 --- a/paddle/fluid/operators/reader/reader_blocking_queue_test.cc +++ b/paddle/fluid/operators/reader/reader_blocking_queue_test.cc @@ -12,12 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include // NOLINT -#include #include // NOLINT -#include -#include "gtest/gtest.h" +#include "gtest/gtest.h" #include "paddle/fluid/operators/reader/blocking_queue.h" using paddle::operators::reader::BlockingQueue; diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 952ed466288..bccaae70229 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -13,8 +13,12 @@ // limitations under the License. #include "paddle/fluid/operators/reader/reader_op_registry.h" -#include -#include + +namespace paddle { +namespace framework { +class VarDesc; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reader/reader_op_registry.h b/paddle/fluid/operators/reader/reader_op_registry.h index 35a0dacaec2..fec496446ac 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.h +++ b/paddle/fluid/operators/reader/reader_op_registry.h @@ -18,9 +18,17 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class ReaderBase; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { namespace reader { diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index c1adaf2037a..35f52ffa522 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -13,9 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/recurrent_op.h" - #include -#include "paddle/fluid/string/string_helper.h" + +namespace paddle { +namespace framework { +class InferShapeContext; +class LoDTensor; +class OpDesc; +} // namespace framework +} // namespace paddle namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/recurrent_op.h b/paddle/fluid/operators/recurrent_op.h index 1ca66527e1b..e3f512d45c0 100644 --- a/paddle/fluid/operators/recurrent_op.h +++ b/paddle/fluid/operators/recurrent_op.h @@ -22,6 +22,12 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace platform { +class DeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc index 57df2664824..2d7cce68e81 100644 --- a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc +++ b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc @@ -13,9 +13,22 @@ // limitations under the License. #include "paddle/fluid/operators/reduce_ops/frobenius_norm_op.h" -#include + #include +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_all_op.cc b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc index 30265b3cc71..10095bc9550 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_all_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc @@ -14,6 +14,21 @@ #include "paddle/fluid/operators/reduce_ops/reduce_all_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + // kernel's device type is decided by input tensor place, to be consistent with // compare and logical ops REGISTER_REDUCE_OP_WITHOUT_GRAD(reduce_all, UseInputPlace); diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc index cbc18f18b8e..f288fce7538 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_any_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc @@ -14,6 +14,21 @@ #include "paddle/fluid/operators/reduce_ops/reduce_any_op.h" +namespace paddle { +namespace framework { +class OpDesc; +template +class EmptyGradOpMaker; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + // kernel's device type is decided by input tensor place, to be consistent with // compare and logical ops REGISTER_REDUCE_OP_WITHOUT_GRAD(reduce_any, UseInputPlace); diff --git a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc index 88935107df1..f27cd6b125b 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_prod_op.cc @@ -14,6 +14,19 @@ #include "paddle/fluid/operators/reduce_ops/reduce_prod_op.h" +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + REGISTER_REDUCE_OP(reduce_prod); REGISTER_OP_CPU_KERNEL(reduce_prod, ops::ReduceKernel + #include +namespace paddle { +namespace framework { +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +struct CPUPlace; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc index cb1a2962d9b..d8d4e641aeb 100644 --- a/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc +++ b/paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc @@ -12,10 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class LoDRankTable; +class LoDTensor; +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 01a33a46521..e03824ca8c3 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -13,9 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include + #include "paddle/fluid/framework/op_registry.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +struct CPUPlace; +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc index eea2d2ac57a..95b23a0b8cc 100644 --- a/paddle/fluid/operators/rnn_memory_helper_op.cc +++ b/paddle/fluid/operators/rnn_memory_helper_op.cc @@ -15,6 +15,17 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { class RNNMemoryHelperOp : public framework::OperatorBase { diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 9d51f3e292f..55e35e43eb9 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -14,9 +14,21 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" -#include #include +namespace paddle { +namespace framework { +class InferShapeContext; +class OpDesc; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +namespace platform { +class CPUDeviceContext; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scatter_test.cc b/paddle/fluid/operators/scatter_test.cc index eb248e59b6c..c83726180ba 100644 --- a/paddle/fluid/operators/scatter_test.cc +++ b/paddle/fluid/operators/scatter_test.cc @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/scatter.h" + #include -#include -#include -#include "paddle/fluid/framework/ddim.h" + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc index 43fd84a711f..6eda8595b17 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc @@ -13,6 +13,13 @@ // limitations under the License. #include "paddle/fluid/operators/sequence_ops/sequence_concat_op.h" +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace platform { +class CUDADeviceContext; +} // namespace platform +} // namespace paddle template using Kernel = diff --git a/paddle/fluid/operators/softmax_cudnn_op.cu.cc b/paddle/fluid/operators/softmax_cudnn_op.cu.cc index 94e54266f0f..5b857960706 100644 --- a/paddle/fluid/operators/softmax_cudnn_op.cu.cc +++ b/paddle/fluid/operators/softmax_cudnn_op.cu.cc @@ -14,9 +14,15 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/softmax_op.h" -#include "paddle/fluid/platform/cudnn_desc.h" #include "paddle/fluid/platform/cudnn_helper.h" +namespace paddle { +namespace platform { +struct CUDAPlace; +struct float16; +} // namespace platform +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/split_lod_tensor_op.cc b/paddle/fluid/operators/split_lod_tensor_op.cc index aa8c0b13dbb..4adbbacc844 100644 --- a/paddle/fluid/operators/split_lod_tensor_op.cc +++ b/paddle/fluid/operators/split_lod_tensor_op.cc @@ -13,9 +13,20 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" +namespace paddle { +namespace framework { +class InferShapeContext; +class LoDTensor; +class OpDesc; +class Scope; +} // namespace framework +namespace imperative { +class OpBase; +} // namespace imperative +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/strided_memcpy.h b/paddle/fluid/operators/strided_memcpy.h index 142b00b4de6..48d6cf8b361 100644 --- a/paddle/fluid/operators/strided_memcpy.h +++ b/paddle/fluid/operators/strided_memcpy.h @@ -11,8 +11,10 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/detail/strided_memcpy.h" + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/strided_memcpy_test.cc b/paddle/fluid/operators/strided_memcpy_test.cc index 3a450773a9d..83480b44d5b 100644 --- a/paddle/fluid/operators/strided_memcpy_test.cc +++ b/paddle/fluid/operators/strided_memcpy_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/strided_memcpy.h" + #include "gtest/gtest.h" -#include "paddle/fluid/memory/memory.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tensor_formatter.cc b/paddle/fluid/operators/tensor_formatter.cc index 7b8b484a11e..e4fa4a96a5c 100644 --- a/paddle/fluid/operators/tensor_formatter.cc +++ b/paddle/fluid/operators/tensor_formatter.cc @@ -12,11 +12,10 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/tensor_formatter.h" #include #include -#include "paddle/fluid/operators/tensor_formatter.h" - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tensor_formatter.h b/paddle/fluid/operators/tensor_formatter.h index 1731348479d..aee5eec0d1c 100644 --- a/paddle/fluid/operators/tensor_formatter.h +++ b/paddle/fluid/operators/tensor_formatter.h @@ -18,6 +18,12 @@ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/var_type.h" +namespace paddle { +namespace framework { +class LoDTensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc index 708fccf9715..d4eb79aa0f2 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc @@ -14,9 +14,6 @@ #ifdef PADDLE_WITH_CUDA -#include -#include - #include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h" namespace paddle { diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 9cfe47da5db..922340b08c6 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -31,6 +31,18 @@ #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/helper.h" +namespace paddle { +namespace inference { +namespace tensorrt { +class TRTCalibratorEngine; +class TRTCalibratorEngineManager; +class TRTInt8Calibrator; +} // namespace tensorrt +template +struct Singleton; +} // namespace inference +} // namespace paddle + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h index cc2fe4cdbdb..ce94ba1ce9e 100644 --- a/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h +++ b/paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h @@ -16,6 +16,7 @@ #include #include + #include "gtest/gtest.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/platform/bfloat16.h b/paddle/fluid/platform/bfloat16.h index 742329abb2d..4460139219f 100644 --- a/paddle/fluid/platform/bfloat16.h +++ b/paddle/fluid/platform/bfloat16.h @@ -23,9 +23,15 @@ #endif #include + #include "paddle/fluid/platform/hostdevice.h" #include "unsupported/Eigen/CXX11/Tensor" +namespace Eigen { +template +struct NumTraits; +} // namespace Eigen + namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/bfloat16_test.cc b/paddle/fluid/platform/bfloat16_test.cc index bdb508ee336..fc964d7df35 100644 --- a/paddle/fluid/platform/bfloat16_test.cc +++ b/paddle/fluid/platform/bfloat16_test.cc @@ -10,14 +10,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/bfloat16.h" - #include #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/init.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index 4cb6ee3143a..54dac976276 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -14,12 +14,8 @@ #if defined(PADDLE_WITH_NCCL) #include "paddle/fluid/platform/collective_helper.h" - -#include #include -#include "paddle/fluid/platform/dynload/nccl.h" - namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index b86fd70c9ae..e379832593c 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/platform/cpu_info.h" #ifdef PADDLE_WITH_XBYAK -#include "xbyak/xbyak.h" #include "xbyak/xbyak_util.h" #endif diff --git a/paddle/fluid/platform/cuda_resource_pool.h b/paddle/fluid/platform/cuda_resource_pool.h index 22b53445d84..570b68b08fc 100644 --- a/paddle/fluid/platform/cuda_resource_pool.h +++ b/paddle/fluid/platform/cuda_resource_pool.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/platform/resource_pool.h" namespace paddle { diff --git a/paddle/fluid/platform/cudnn_desc.h b/paddle/fluid/platform/cudnn_desc.h index e0ba1aaa6bd..0e0218dcca3 100644 --- a/paddle/fluid/platform/cudnn_desc.h +++ b/paddle/fluid/platform/cudnn_desc.h @@ -22,8 +22,15 @@ #include #include #include + #include "paddle/fluid/platform/cudnn_helper.h" +namespace paddle { +namespace framework { +class Tensor; +} // namespace framework +} // namespace paddle + namespace paddle { namespace platform { using framework::Tensor; diff --git a/paddle/fluid/platform/cudnn_helper.h b/paddle/fluid/platform/cudnn_helper.h index 4b9c5c429da..e983e368953 100644 --- a/paddle/fluid/platform/cudnn_helper.h +++ b/paddle/fluid/platform/cudnn_helper.h @@ -23,6 +23,12 @@ limitations under the License. */ #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/macros.h" +namespace paddle { +namespace platform { +struct float16; +} // namespace platform +} // namespace paddle + DECLARE_bool(cudnn_deterministic); namespace paddle { diff --git a/paddle/fluid/platform/device_code.cc b/paddle/fluid/platform/device_code.cc index 9d5a0954b00..2474903edf7 100644 --- a/paddle/fluid/platform/device_code.cc +++ b/paddle/fluid/platform/device_code.cc @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_code.h" + #include #include #include #include + #include "paddle/fluid/platform/enforce.h" DECLARE_string(cuda_dir); diff --git a/paddle/fluid/platform/device_code.h b/paddle/fluid/platform/device_code.h index 6128d8b78db..4199317a8ce 100644 --- a/paddle/fluid/platform/device_code.h +++ b/paddle/fluid/platform/device_code.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/device_context.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/dynload/cuda_driver.h" diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 28d94627f95..e1438a1eefa 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/memory/malloc.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_helper.h" @@ -35,6 +36,7 @@ limitations under the License. */ #endif #include + #include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" @@ -44,6 +46,11 @@ limitations under the License. */ #define EIGEN_USE_THREADS #include "unsupported/Eigen/CXX11/Tensor" +namespace Eigen { +struct DefaultDevice; +struct GpuDevice; +} // namespace Eigen + #ifdef PADDLE_WITH_XPU #include "paddle/fluid/platform/xpu_header.h" #endif @@ -118,8 +125,8 @@ struct DefaultDeviceContextType { #ifdef PADDLE_WITH_CUDA -class EigenCudaStreamDevice; class CudnnWorkspaceHandle; +class EigenCudaStreamDevice; class CUDAContext { public: diff --git a/paddle/fluid/platform/device_memory_aligment.h b/paddle/fluid/platform/device_memory_aligment.h index 2c19a2b1062..5cc33fd31f1 100644 --- a/paddle/fluid/platform/device_memory_aligment.h +++ b/paddle/fluid/platform/device_memory_aligment.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index 85168a046fb..9bae7a87052 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -28,6 +28,8 @@ namespace platform { /////////////////////// // WARN: Under Development. Don't depend on it yet. ////////////////////// +class Event; + inline uint64_t PosixInNsec() { struct timeval tv; gettimeofday(&tv, nullptr); diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index 562e7542012..045caab7488 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include // NOLINT #include + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/cuda_driver.h b/paddle/fluid/platform/dynload/cuda_driver.h index 056fcc069db..5799b084f5f 100644 --- a/paddle/fluid/platform/dynload/cuda_driver.h +++ b/paddle/fluid/platform/dynload/cuda_driver.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index dd0a2e19685..88b545b48e5 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include - #include +#include #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/cupti.cc b/paddle/fluid/platform/dynload/cupti.cc index a25660c6ed4..d8381580c90 100644 --- a/paddle/fluid/platform/dynload/cupti.cc +++ b/paddle/fluid/platform/dynload/cupti.cc @@ -15,7 +15,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUPTI #include "paddle/fluid/platform/dynload/cupti.h" -#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/dynload/curand.h b/paddle/fluid/platform/dynload/curand.h index 48076e5478a..7a160664bc2 100644 --- a/paddle/fluid/platform/dynload/curand.h +++ b/paddle/fluid/platform/dynload/curand.h @@ -14,11 +14,10 @@ limitations under the License. */ #pragma once #include - #include // NOLINT -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/dynload/cusolver.h b/paddle/fluid/platform/dynload/cusolver.h index ba2d38729c1..561f20af45a 100644 --- a/paddle/fluid/platform/dynload/cusolver.h +++ b/paddle/fluid/platform/dynload/cusolver.h @@ -15,11 +15,10 @@ limitations under the License. */ #include #include - #include // NOLINT -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/dynload/dynamic_loader.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 82e4f6ac75e..0c8a64ccf69 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/dynamic_loader.h" -#include -#include // NOLINT #include #include @@ -22,7 +20,6 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/fluid/platform/dynload/cupti_lib_path.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/port.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index 2be95b113b2..9369cf131da 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/nccl.h b/paddle/fluid/platform/dynload/nccl.h index 1d5fa45ecf6..407f34f0ac3 100644 --- a/paddle/fluid/platform/dynload/nccl.h +++ b/paddle/fluid/platform/dynload/nccl.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include - #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/nvrtc.h b/paddle/fluid/platform/dynload/nvrtc.h index 9464a23ba1e..720450d28b1 100644 --- a/paddle/fluid/platform/dynload/nvrtc.h +++ b/paddle/fluid/platform/dynload/nvrtc.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" diff --git a/paddle/fluid/platform/dynload/warpctc.h b/paddle/fluid/platform/dynload/warpctc.h index bc1977b05de..e10a7233b62 100644 --- a/paddle/fluid/platform/dynload/warpctc.h +++ b/paddle/fluid/platform/dynload/warpctc.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include // NOLINT + #include "paddle/fluid/platform/dynload/dynamic_loader.h" #include "paddle/fluid/platform/port.h" #include "warpctc/include/ctc.h" diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index ce1ec507307..a3ae9e48eea 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -70,6 +70,12 @@ limitations under the License. */ #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/type_defs.h" +namespace paddle { +namespace platform { +class ErrorSummary; +} // namespace platform +} // namespace paddle + DECLARE_int32(call_stack_level); namespace paddle { diff --git a/paddle/fluid/platform/errors_test.cc b/paddle/fluid/platform/errors_test.cc index 3c84215b5e5..a73c1ba3d34 100644 --- a/paddle/fluid/platform/errors_test.cc +++ b/paddle/fluid/platform/errors_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include #include #include "gtest/gtest.h" diff --git a/paddle/fluid/platform/float16_test.cc b/paddle/fluid/platform/float16_test.cc index 261ec68483f..ec8a98eeb1a 100644 --- a/paddle/fluid/platform/float16_test.cc +++ b/paddle/fluid/platform/float16_test.cc @@ -10,13 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/float16.h" -#include - #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#include #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/init.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/lodtensor_printer.cc b/paddle/fluid/platform/lodtensor_printer.cc index 33d0fe62680..0be4233269e 100644 --- a/paddle/fluid/platform/lodtensor_printer.cc +++ b/paddle/fluid/platform/lodtensor_printer.cc @@ -13,9 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/lodtensor_printer.h" -#include "paddle/fluid/framework/lod_tensor_array.h" + #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/variable.h" + +namespace paddle { +namespace framework { +class LoDTensor; +class Variable; +} // namespace framework +} // namespace paddle namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/lodtensor_printer.h b/paddle/fluid/platform/lodtensor_printer.h index e070e3540c9..e0bd1fff197 100644 --- a/paddle/fluid/platform/lodtensor_printer.h +++ b/paddle/fluid/platform/lodtensor_printer.h @@ -14,8 +14,15 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/scope.h" +namespace paddle { +namespace framework { +class Scope; +} // namespace framework +} // namespace paddle + namespace paddle { namespace platform { void PrintVar(framework::Scope* scope, const std::string& var_name, diff --git a/paddle/fluid/platform/lodtensor_printer_test.cc b/paddle/fluid/platform/lodtensor_printer_test.cc index 19e85284b8f..5b2af270740 100644 --- a/paddle/fluid/platform/lodtensor_printer_test.cc +++ b/paddle/fluid/platform/lodtensor_printer_test.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/platform/lodtensor_printer.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/variable.h" TEST(LodTensorPrinter, PrintVar) { paddle::framework::Scope scope; diff --git a/paddle/fluid/platform/monitor.cc b/paddle/fluid/platform/monitor.cc index e65e09f45c0..76554012bf5 100644 --- a/paddle/fluid/platform/monitor.cc +++ b/paddle/fluid/platform/monitor.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/platform/monitor.h" -#include namespace paddle { namespace platform {} // namespace platform diff --git a/paddle/fluid/platform/monitor.h b/paddle/fluid/platform/monitor.h index 2f186420b41..b57fae9daac 100644 --- a/paddle/fluid/platform/monitor.h +++ b/paddle/fluid/platform/monitor.h @@ -22,6 +22,7 @@ #include #include #include + #include "glog/logging.h" namespace paddle { diff --git a/paddle/fluid/platform/place_test.cc b/paddle/fluid/platform/place_test.cc index 13f28c73f45..41e084efa57 100644 --- a/paddle/fluid/platform/place_test.cc +++ b/paddle/fluid/platform/place_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/platform/place.h" -#include + #include "gtest/gtest.h" TEST(Place, Equality) { diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 85759bc6e2e..56a6275b582 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -12,28 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include -#include #include // NOLINT #include -#include #include -#include -#ifdef PADDLE_WITH_CUDA -#include -#endif // PADDLE_WITH_CUDA -#include "glog/logging.h" -#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/errors.h" -#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler_helper.h" -#include "paddle/fluid/string/printf.h" DEFINE_bool(enable_rpc_profiler, false, "Enable rpc profiler or not."); diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index 12049d815cf..0185328ff32 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -23,6 +23,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/event.h" diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index 4d340a06342..2ce898d4617 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler.h" + #include -#ifdef PADDLE_WITH_CUDA -#include -#endif + #include "gtest/gtest.h" TEST(Event, CpuElapsedTime) { diff --git a/paddle/fluid/platform/stream/cuda_stream.h b/paddle/fluid/platform/stream/cuda_stream.h index 4272d5fd0b1..c65d107cf45 100644 --- a/paddle/fluid/platform/stream/cuda_stream.h +++ b/paddle/fluid/platform/stream/cuda_stream.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/platform/timer.h b/paddle/fluid/platform/timer.h index ff0e1d95c29..09dcc4369be 100644 --- a/paddle/fluid/platform/timer.h +++ b/paddle/fluid/platform/timer.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/platform/port.h" #ifdef _WIN32 diff --git a/paddle/fluid/string/piece_test.cc b/paddle/fluid/string/piece_test.cc index 80b712b08cc..544b5985ed2 100644 --- a/paddle/fluid/string/piece_test.cc +++ b/paddle/fluid/string/piece_test.cc @@ -14,8 +14,6 @@ #include "paddle/fluid/string/piece.h" -#include - #include "gtest/gtest.h" TEST(StringPiece, Construct) { diff --git a/paddle/fluid/string/pretty_log.h b/paddle/fluid/string/pretty_log.h index da4c1f326fb..5e2aedb22ad 100644 --- a/paddle/fluid/string/pretty_log.h +++ b/paddle/fluid/string/pretty_log.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/string/printf.h" DECLARE_bool(color); diff --git a/paddle/fluid/string/string_helper.cc b/paddle/fluid/string/string_helper.cc index 712db90d2f4..8731e8fca8a 100644 --- a/paddle/fluid/string/string_helper.cc +++ b/paddle/fluid/string/string_helper.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/fluid/string/string_helper.h" + #include #include #include #include -#include -#include "boost/lexical_cast.hpp" + #include "glog/logging.h" namespace paddle { diff --git a/paddle/fluid/string/string_helper.h b/paddle/fluid/string/string_helper.h index 8bf379a6b34..499539226bd 100644 --- a/paddle/fluid/string/string_helper.h +++ b/paddle/fluid/string/string_helper.h @@ -20,6 +20,7 @@ #include #include #include + #include "boost/lexical_cast.hpp" #include "glog/logging.h" diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index c19bd56fbbf..5400c55a0b1 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -12,12 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/allocator_strategy.h" -#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/init.h" int main(int argc, char** argv) { -- GitLab From 081fb2f96342efa2ff273019b12aacbf83a33284 Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Thu, 24 Sep 2020 13:50:41 +0800 Subject: [PATCH 067/117] Remove dependency on nltk for paddle __init__. (#27388) * Remove dependency on nltk for paddle __init__. test=develop * Remove nltk.movie_reivew sentiment dataset to remove dependency on nltk. test=develop --- python/paddle/dataset/__init__.py | 2 - python/paddle/dataset/sentiment.py | 150 --------------- python/paddle/dataset/tests/test_sentiment.py | 58 ------ .../tests/unittests/test_dataset_sentiment.py | 42 ----- .../tests/test_dataset_movie_reviews.py | 50 ----- python/paddle/text/datasets/__init__.py | 3 - python/paddle/text/datasets/movie_reviews.py | 173 ------------------ 7 files changed, 478 deletions(-) delete mode 100644 python/paddle/dataset/sentiment.py delete mode 100644 python/paddle/dataset/tests/test_sentiment.py delete mode 100644 python/paddle/fluid/tests/unittests/test_dataset_sentiment.py delete mode 100644 python/paddle/tests/test_dataset_movie_reviews.py delete mode 100644 python/paddle/text/datasets/movie_reviews.py diff --git a/python/paddle/dataset/__init__.py b/python/paddle/dataset/__init__.py index 54aa3edc51d..d1e59758565 100644 --- a/python/paddle/dataset/__init__.py +++ b/python/paddle/dataset/__init__.py @@ -22,7 +22,6 @@ import paddle.dataset.cifar import paddle.dataset.movielens import paddle.dataset.conll05 import paddle.dataset.uci_housing -import paddle.dataset.sentiment import paddle.dataset.wmt14 import paddle.dataset.wmt16 import paddle.dataset.mq2007 @@ -37,7 +36,6 @@ __all__ = [ 'cifar', 'movielens', 'conll05', - 'sentiment', 'uci_housing', 'wmt14', 'wmt16', diff --git a/python/paddle/dataset/sentiment.py b/python/paddle/dataset/sentiment.py deleted file mode 100644 index 721cb5a8192..00000000000 --- a/python/paddle/dataset/sentiment.py +++ /dev/null @@ -1,150 +0,0 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -The script fetch and preprocess movie_reviews data set that provided by NLTK - -TODO(yuyang18): Complete dataset. -""" - -from __future__ import print_function - -import six -import collections -from itertools import chain - -import os -import nltk -from nltk.corpus import movie_reviews -import zipfile -from functools import cmp_to_key - -import paddle.dataset.common - -URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip" -MD5 = '155de2b77c6834dd8eea7cbe88e93acb' - -__all__ = ['train', 'test', 'get_word_dict'] -NUM_TRAINING_INSTANCES = 1600 -NUM_TOTAL_INSTANCES = 2000 - - -def download_data_if_not_yet(): - """ - Download the data set, if the data set is not download. - """ - try: - # download and extract movie_reviews.zip - paddle.dataset.common.download( - URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip') - path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora') - filename = os.path.join(path, 'movie_reviews.zip') - zip_file = zipfile.ZipFile(filename) - zip_file.extractall(path) - zip_file.close() - # make sure that nltk can find the data - if paddle.dataset.common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(paddle.dataset.common.DATA_HOME) - movie_reviews.categories() - except LookupError: - print("Downloading movie_reviews data set, please wait.....") - nltk.download( - 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) - print("Download data set success.....") - print("Path is " + nltk.data.find('corpora/movie_reviews').path) - - -def get_word_dict(): - """ - Sorted the words by the frequency of words which occur in sample - :return: - words_freq_sorted - """ - words_freq_sorted = list() - word_freq_dict = collections.defaultdict(int) - download_data_if_not_yet() - - for category in movie_reviews.categories(): - for field in movie_reviews.fileids(category): - for words in movie_reviews.words(field): - word_freq_dict[words] += 1 - words_sort_list = list(six.iteritems(word_freq_dict)) - words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1])) - for index, word in enumerate(words_sort_list): - words_freq_sorted.append((word[0], index)) - return words_freq_sorted - - -def sort_files(): - """ - Sorted the sample for cross reading the sample - :return: - files_list - """ - files_list = list() - neg_file_list = movie_reviews.fileids('neg') - pos_file_list = movie_reviews.fileids('pos') - files_list = list( - chain.from_iterable(list(zip(neg_file_list, pos_file_list)))) - return files_list - - -def load_sentiment_data(): - """ - Load the data set - :return: - data_set - """ - data_set = list() - download_data_if_not_yet() - words_ids = dict(get_word_dict()) - for sample_file in sort_files(): - words_list = list() - category = 0 if 'neg' in sample_file else 1 - for word in movie_reviews.words(sample_file): - words_list.append(words_ids[word.lower()]) - data_set.append((words_list, category)) - return data_set - - -def reader_creator(data): - """ - Reader creator, generate an iterator for data set - :param data: - train data set or test data set - """ - for each in data: - yield each[0], each[1] - - -def train(): - """ - Default training set reader creator - """ - data_set = load_sentiment_data() - return reader_creator(data_set[0:NUM_TRAINING_INSTANCES]) - - -def test(): - """ - Default test set reader creator - """ - data_set = load_sentiment_data() - return reader_creator(data_set[NUM_TRAINING_INSTANCES:]) - - -def fetch(): - nltk.download('movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) diff --git a/python/paddle/dataset/tests/test_sentiment.py b/python/paddle/dataset/tests/test_sentiment.py deleted file mode 100644 index 3540ea06b07..00000000000 --- a/python/paddle/dataset/tests/test_sentiment.py +++ /dev/null @@ -1,58 +0,0 @@ -# /usr/bin/env python -# -*- coding:utf-8 -*- - -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import unittest -import nltk -import paddle.dataset.sentiment as st -from nltk.corpus import movie_reviews - - -class TestSentimentMethods(unittest.TestCase): - def test_get_word_dict(self): - word_dict = st.get_word_dict()[0:10] - test_word_list = [(',', 0), ('the', 1), ('.', 2), ('a', 3), ('and', 4), - ('of', 5), ('to', 6), ("'", 7), ('is', 8), ('in', 9)] - for idx, each in enumerate(word_dict): - self.assertEqual(each, test_word_list[idx]) - self.assertTrue("/root/.cache/paddle/dataset" in nltk.data.path) - - def test_sort_files(self): - last_label = '' - for sample_file in st.sort_files(): - current_label = sample_file.split("/")[0] - self.assertNotEqual(current_label, last_label) - last_label = current_label - - def test_data_set(self): - data_set = st.load_sentiment_data() - last_label = -1 - - for each in st.test(): - self.assertNotEqual(each[1], last_label) - last_label = each[1] - - self.assertEqual(len(data_set), st.NUM_TOTAL_INSTANCES) - self.assertEqual(len(list(st.train())), st.NUM_TRAINING_INSTANCES) - self.assertEqual( - len(list(st.test())), - (st.NUM_TOTAL_INSTANCES - st.NUM_TRAINING_INSTANCES)) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py b/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py deleted file mode 100644 index b5d5d33fa3f..00000000000 --- a/python/paddle/fluid/tests/unittests/test_dataset_sentiment.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -TestCases for Dataset, -including create, config, run, etc. -""" - -from __future__ import print_function -import numpy as np -import unittest -import os -import paddle -import zipfile -import paddle.dataset.common - -URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip" -MD5 = '155de2b77c6834dd8eea7cbe88e93acb' - - -class TestDatasetSentiment(unittest.TestCase): - """ TestCases for Sentiment. """ - - def test_get_word_dict(self): - """ Testcase for get_word_dict. """ - words_freq_sorted = paddle.dataset.sentiment.get_word_dict() - print(words_freq_sorted) - self.assertTrue(len(words_freq_sorted) == 39768) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/tests/test_dataset_movie_reviews.py b/python/paddle/tests/test_dataset_movie_reviews.py deleted file mode 100644 index e6e6667013f..00000000000 --- a/python/paddle/tests/test_dataset_movie_reviews.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np - -from paddle.text.datasets import * - - -class TestMovieReviewsTrain(unittest.TestCase): - def test_main(self): - movie_reviews = MovieReviews(mode='train') - self.assertTrue(len(movie_reviews) == 1600) - - # traversal whole dataset may cost a - # long time, randomly check 1 sample - idx = np.random.randint(0, 1600) - data = movie_reviews[idx] - self.assertTrue(len(data) == 2) - self.assertTrue(len(data[0].shape) == 1) - self.assertTrue(int(data[1]) in [0, 1]) - - -class TestMovieReviewsTest(unittest.TestCase): - def test_main(self): - movie_reviews = MovieReviews(mode='test') - self.assertTrue(len(movie_reviews) == 400) - - # traversal whole dataset may cost a - # long time, randomly check 1 sample - idx = np.random.randint(0, 400) - data = movie_reviews[idx] - self.assertTrue(len(data) == 2) - self.assertTrue(len(data[0].shape) == 1) - self.assertTrue(int(data[1]) in [0, 1]) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/text/datasets/__init__.py b/python/paddle/text/datasets/__init__.py index b5cea40a4f4..71571d09b5c 100644 --- a/python/paddle/text/datasets/__init__.py +++ b/python/paddle/text/datasets/__init__.py @@ -16,7 +16,6 @@ from . import conll05 from . import imdb from . import imikolov from . import movielens -from . import movie_reviews from . import uci_housing from . import wmt14 from . import wmt16 @@ -25,7 +24,6 @@ from .conll05 import * from .imdb import * from .imikolov import * from .movielens import * -from .movie_reviews import * from .uci_housing import * from .wmt14 import * from .wmt16 import * @@ -34,7 +32,6 @@ __all__ = conll05.__all__ \ + imdb.__all__ \ + imikolov.__all__ \ + movielens.__all__ \ - + movie_reviews.__all__ \ + uci_housing.__all__ \ + wmt14.__all__ \ + wmt16.__all__ diff --git a/python/paddle/text/datasets/movie_reviews.py b/python/paddle/text/datasets/movie_reviews.py deleted file mode 100644 index db5b15654f9..00000000000 --- a/python/paddle/text/datasets/movie_reviews.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import os -import six -import numpy as np -import collections -import nltk -from nltk.corpus import movie_reviews -import zipfile -from functools import cmp_to_key -from itertools import chain - -import paddle -from paddle.io import Dataset - -__all__ = ['MovieReviews'] - -URL = "https://corpora.bj.bcebos.com/movie_reviews%2Fmovie_reviews.zip" -MD5 = '155de2b77c6834dd8eea7cbe88e93acb' - -NUM_TRAINING_INSTANCES = 1600 -NUM_TOTAL_INSTANCES = 2000 - - -class MovieReviews(Dataset): - """ - Implementation of `NLTK movie reviews `_ dataset. - - Args: - data_file(str): path to data tar file, can be set None if - :attr:`download` is True. Default None - mode(str): 'train' 'test' mode. Default 'train'. - download(bool): whether auto download cifar dataset if - :attr:`data_file` unset. Default True. - - Returns: - Dataset: instance of movie reviews dataset - - Examples: - - .. code-block:: python - - import paddle - from paddle.text.datasets import MovieReviews - - class SimpleNet(paddle.nn.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - - def forward(self, word, category): - return paddle.sum(word), category - - paddle.disable_static() - - movie_reviews = MovieReviews(mode='train') - - for i in range(10): - word_list, category = movie_reviews[i] - word_list = paddle.to_tensor(word_list) - category = paddle.to_tensor(category) - - model = SimpleNet() - word_list, category = model(word_list, category) - print(word_list.numpy().shape, category.numpy()) - - """ - - def __init__(self, mode='train'): - assert mode.lower() in ['train', 'test'], \ - "mode should be 'train', 'test', but got {}".format(mode) - self.mode = mode.lower() - - self._download_data_if_not_yet() - - # read dataset into memory - self._load_sentiment_data() - - def _get_word_dict(self): - """ - Sorted the words by the frequency of words which occur in sample - :return: - words_freq_sorted - """ - words_freq_sorted = list() - word_freq_dict = collections.defaultdict(int) - - for category in movie_reviews.categories(): - for field in movie_reviews.fileids(category): - for words in movie_reviews.words(field): - word_freq_dict[words] += 1 - words_sort_list = list(six.iteritems(word_freq_dict)) - words_sort_list.sort(key=cmp_to_key(lambda a, b: b[1] - a[1])) - for index, word in enumerate(words_sort_list): - words_freq_sorted.append((word[0], index)) - return words_freq_sorted - - def _sort_files(self): - """ - Sorted the sample for cross reading the sample - :return: - files_list - """ - files_list = list() - neg_file_list = movie_reviews.fileids('neg') - pos_file_list = movie_reviews.fileids('pos') - files_list = list( - chain.from_iterable(list(zip(neg_file_list, pos_file_list)))) - return files_list - - def _load_sentiment_data(self): - """ - Load the data set - :return: - data_set - """ - self.data = [] - words_ids = dict(self._get_word_dict()) - for sample_file in self._sort_files(): - words_list = list() - category = 0 if 'neg' in sample_file else 1 - for word in movie_reviews.words(sample_file): - words_list.append(words_ids[word.lower()]) - self.data.append((words_list, category)) - - def _download_data_if_not_yet(self): - """ - Download the data set, if the data set is not download. - """ - try: - # download and extract movie_reviews.zip - paddle.dataset.common.download( - URL, 'corpora', md5sum=MD5, save_name='movie_reviews.zip') - path = os.path.join(paddle.dataset.common.DATA_HOME, 'corpora') - filename = os.path.join(path, 'movie_reviews.zip') - zip_file = zipfile.ZipFile(filename) - zip_file.extractall(path) - zip_file.close() - # make sure that nltk can find the data - if paddle.dataset.common.DATA_HOME not in nltk.data.path: - nltk.data.path.append(paddle.dataset.common.DATA_HOME) - movie_reviews.categories() - except LookupError: - print("Downloading movie_reviews data set, please wait.....") - nltk.download( - 'movie_reviews', download_dir=paddle.dataset.common.DATA_HOME) - print("Download data set success.....") - print("Path is " + nltk.data.find('corpora/movie_reviews').path) - - def __getitem__(self, idx): - if self.mode == 'test': - idx += NUM_TRAINING_INSTANCES - data = self.data[idx] - return np.array(data[0]), np.array(data[1]) - - def __len__(self): - if self.mode == 'train': - return NUM_TRAINING_INSTANCES - else: - return NUM_TOTAL_INSTANCES - NUM_TRAINING_INSTANCES -- GitLab From 8f7bb52bd2e0cdf23c3441f3ec85b733a0f900f0 Mon Sep 17 00:00:00 2001 From: Shibo Tao <62922815+T8T9@users.noreply.github.com> Date: Thu, 24 Sep 2020 13:52:48 +0800 Subject: [PATCH 068/117] fix tensorrt 6 build error. test=develop (#27511) * fix tensorrt 6 build error. test=develop * fix. test=develop * bug fix * test=develop --- paddle/fluid/platform/dynload/tensorrt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/platform/dynload/tensorrt.h b/paddle/fluid/platform/dynload/tensorrt.h index 67a79ce4bb1..cb751071062 100644 --- a/paddle/fluid/platform/dynload/tensorrt.h +++ b/paddle/fluid/platform/dynload/tensorrt.h @@ -35,7 +35,7 @@ extern void* tensorrt_dso_handle; #define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \ struct DynLoad__##__name { \ template \ - auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ + void* operator()(Args... args) { \ std::call_once(tensorrt_dso_flag, []() { \ tensorrt_dso_handle = paddle::platform::dynload::GetTensorRtHandle(); \ }); \ @@ -44,7 +44,8 @@ extern void* tensorrt_dso_handle; return nullptr; \ } \ using tensorrt_func = decltype(&::__name); \ - return reinterpret_cast(p_##__name)(args...); \ + auto ret = reinterpret_cast(p_##__name)(args...); \ + return static_cast(ret); \ } \ }; \ extern DynLoad__##__name __name -- GitLab From f91c37e6655473066261b7e1d248844d9eaabb84 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Thu, 24 Sep 2020 14:04:32 +0800 Subject: [PATCH 069/117] Refine error message of MatchMatrix and PyramidHash (#27484) --- .../fluid/operators/match_matrix_tensor_op.cc | 128 ++++++++++++------ paddle/fluid/operators/pyramid_hash_op.cc | 16 ++- 2 files changed, 101 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index 8bad10d7235..e95aef8eb56 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -28,34 +28,54 @@ using LoDTensor = framework::LoDTensor; using LoD = framework::LoD; void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - "X(Input) of MatchMatrix should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput("Y"), true, - "Y(Input) of MatchMatrix should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true, - "W(Input) of MatchMatrix should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, - "Out(Output) of MatchMatrix should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasOutput("Tmp"), true, - "Tmp(Output) of MatchMatrix should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "match_matrix_tensor"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "match_matrix_tensor"); + OP_INOUT_CHECK(ctx->HasInput("W"), "Input", "W", "match_matrix_tensor"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "match_matrix_tensor"); + OP_INOUT_CHECK(ctx->HasOutput("Tmp"), "Output", "Tmp", "match_matrix_tensor"); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, - "The rank of Input(X) can't be less than 2."); + platform::errors::InvalidArgument( + "The dimensions of Input(X) should be equal to 2, " + "but received %d.", + x_dims.size())); auto y_dims = ctx->GetInputDim("Y"); PADDLE_ENFORCE_EQ(y_dims.size(), 2, - "The rank of Input(Y) can't be less than 2."); + platform::errors::InvalidArgument( + "The dimensions of Input(Y) should be equal to 2, " + "but received %d.", + y_dims.size())); auto w_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(w_dims.size(), 3UL, "W should be 3-D tensor"); + PADDLE_ENFORCE_EQ(w_dims.size(), 3, + platform::errors::InvalidArgument( + "The dimensions of Input(W) should be equal to 3, " + "but received %d.", + w_dims.size())); int dim_t = ctx->Attrs().Get("dim_t"); - PADDLE_ENFORCE_EQ(w_dims[0], x_dims[1], - "W 's shape must satisfy: W[0] = X[1]"); - PADDLE_ENFORCE_EQ(w_dims[1], dim_t, "W 's shape must satisfy: W[1] = dim_t"); - PADDLE_ENFORCE_EQ(w_dims[2], y_dims[1], - "W 's shape must satisfy: W[2] = Y[1]"); + PADDLE_ENFORCE_EQ( + w_dims[0], x_dims[1], + platform::errors::InvalidArgument( + "The first dimension of Input(W) should be equal to the second " + "dimension of Input(X). But received the first dimension of Input(W) " + "is %d, the second dimension of Input(X) is %d.", + w_dims[0], x_dims[1])); + PADDLE_ENFORCE_EQ( + w_dims[1], dim_t, + platform::errors::InvalidArgument( + "The second dimension of Input(W) should be equal to 'dim_t', but " + "received the second dimension of Input(W) is %d, 'dim_t' is %d.", + w_dims[1], dim_t)); + PADDLE_ENFORCE_EQ( + w_dims[2], y_dims[1], + platform::errors::InvalidArgument( + "The last dimension of Input(W) should be equal to " + "the second dimension of Input(Y). But received the last dimension " + "of Input(W) is %d, the second dimension of Input(Y) is %d.", + w_dims[2], y_dims[1])); int64_t out_dim_0 = -1; int64_t tmp_dim_0 = -1; @@ -63,27 +83,52 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { framework::Variable* x_var = BOOST_GET(framework::Variable*, ctx->GetInputVarPtrs("X")[0]); const auto& x_lod = x_var->Get().lod(); - PADDLE_ENFORCE_EQ(x_lod.empty(), false, "The Input(X) must hold lod info."); + PADDLE_ENFORCE_EQ(x_lod.empty(), false, + platform::errors::InvalidArgument( + "The Input(X) should hold LoD information, but " + "received Input(X).lod() is empty.")); const auto& x_lod_0 = x_lod[0]; PADDLE_ENFORCE_GE(x_lod_0.size(), 2, - "The Input(X)'s lod info is corrupted."); - PADDLE_ENFORCE_EQ( - x_dims[0], static_cast(x_lod_0.back()), - "The Input(X)'s lod info mismatches the actual tensor shape."); + platform::errors::InvalidArgument( + "The dimensions of Input(X)'s LoD data should be " + "equal to 2, but received %d.", + x_lod_0.size())); + PADDLE_ENFORCE_EQ(x_dims[0], static_cast(x_lod_0.back()), + platform::errors::InvalidArgument( + "The last element of Input(X)'s LoD data should be " + "equal to the first dimension of Input(X). " + "But received the last element of Input(X)'s LoD " + "data is %d, the first dimension of Input(X) is %d.", + x_lod_0.back(), x_dims[0])); framework::Variable* y_var = BOOST_GET(framework::Variable*, ctx->GetInputVarPtrs("Y")[0]); const auto& y_lod = y_var->Get().lod(); - PADDLE_ENFORCE_EQ(y_lod.empty(), false, "The Input(Y) must hold lod info."); + PADDLE_ENFORCE_EQ(y_lod.empty(), false, + platform::errors::InvalidArgument( + "The Input(Y) should hold LoD information, but " + "received Input(Y).lod() is empty.")); const auto& y_lod_0 = y_lod[0]; PADDLE_ENFORCE_GE(y_lod_0.size(), 2, - "The Input(Y)'s lod info is corrupted."); - PADDLE_ENFORCE_EQ( - y_dims[0], static_cast(y_lod_0.back()), - "The Input(Y)'s lod info mismatches the actual tensor shape."); + platform::errors::InvalidArgument( + "The dimensions of Input(Y)'s LoD data should be " + "equal to 2, but received %d.", + y_lod_0.size())); + PADDLE_ENFORCE_EQ(y_dims[0], static_cast(y_lod_0.back()), + platform::errors::InvalidArgument( + "The last element of Input(Y)'s LoD data should be " + "equal to the first dimension of Input(Y). " + "But received the last element of Input(Y)'s LoD " + "data is %d, the first dimension of Input(Y) is %d.", + y_lod_0.back(), y_dims[0])); PADDLE_ENFORCE_EQ(x_lod_0.size(), y_lod_0.size(), - "The Length of X and Y must be equal."); + platform::errors::InvalidArgument( + "The dimensions of Input(X)'s and Input(Y)'s LoD " + "data should be equal. " + "But received the dimensions of Input(X)'s LoD is " + "%d, the dimensions of Input(Y)'s LoD is %d.", + x_lod_0.size(), y_lod_0.size())); out_dim_0 = 0; for (size_t i = 1; i < x_lod_0.size(); i++) { @@ -98,10 +143,18 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { // compile time framework::VarDesc* x_desc = BOOST_GET(framework::VarDesc*, ctx->GetInputVarPtrs("X")[0]); - PADDLE_ENFORCE_GE(x_desc->GetLoDLevel(), 1); + PADDLE_ENFORCE_GE( + x_desc->GetLoDLevel(), 1, + platform::errors::InvalidArgument("The LoD level of Input(X) should be " + "greater than 1, but reviced %d.", + x_desc->GetLoDLevel())); framework::VarDesc* y_desc = BOOST_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Y")[0]); - PADDLE_ENFORCE_GE(y_desc->GetLoDLevel(), 1); + PADDLE_ENFORCE_GE( + y_desc->GetLoDLevel(), 1, + platform::errors::InvalidArgument("The LoD level of Input(Y) should be " + "greater than 1, but reviced %d.", + y_desc->GetLoDLevel())); ctx->ShareLoD("X", "Out"); } @@ -115,14 +168,11 @@ void MatchMatrixTensorOP::InferShape(framework::InferShapeContext* ctx) const { void MatchMatrixTensorOpGrad::InferShape( framework::InferShapeContext* ctx) const { - PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, - "Input(X) of SequencePadGradOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput("Y"), true, - "Input(Y) of SequencePadGradOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true, - "Input(W) of SequencePadGradOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput(framework::GradVarName("Out")), true, - "Input(Out@GRAD) of SequencePadGradOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "match_matrix_tensor_grad"); + OP_INOUT_CHECK(ctx->HasInput("Y"), "Input", "Y", "match_matrix_tensor_grad"); + OP_INOUT_CHECK(ctx->HasInput("W"), "Input", "W", "match_matrix_tensor_grad"); + OP_INOUT_CHECK(ctx->HasInput(framework::GradVarName("Out")), "Input", + "Out@GRAD", "match_matrix_tensor_grad"); if (ctx->HasOutput(framework::GradVarName("X"))) { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index 0cae060bc8e..c50a8b731d5 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -285,13 +285,21 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { if (use_filter) { if (white_list_len != 0) { _filter = (math::bloomfilter*)_blobs_1->data(); - PADDLE_ENFORCE_EQ(math::bloomfilter_check(_filter), 1, - "white filter not load"); + PADDLE_ENFORCE_EQ( + math::bloomfilter_check(_filter), 1, + platform::errors::PreconditionNotMet( + "The white filter is not loaded successfully, please make sure " + "'white_list_len': %d is valid for Input(WhiteList).", + white_list_len)); } if (black_list_len != 0) { _black_filter = (math::bloomfilter*)_blobs_2->data(); - PADDLE_ENFORCE_EQ(math::bloomfilter_check(_black_filter), 1, - "black filter not load"); + PADDLE_ENFORCE_EQ( + math::bloomfilter_check(_black_filter), 1, + platform::errors::PreconditionNotMet( + "The black filter is not loaded successfully, please make sure " + "'black_list_len': %d is valid for Input(BlackList).", + black_list_len)); } } -- GitLab From 42363674010d9c8f29135dcecf316aa70004ac77 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Thu, 24 Sep 2020 14:15:16 +0800 Subject: [PATCH 070/117] fix approveals message (#27531) * fix approveals message;test=document_fix * fix approveals message;test=document_fix --- tools/check_api_approvals.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh index 943b8c01e8c..1db3f6d3d27 100644 --- a/tools/check_api_approvals.sh +++ b/tools/check_api_approvals.sh @@ -39,8 +39,8 @@ fi api_spec_diff=`python ${PADDLE_ROOT}/tools/check_api_source_without_core_ops.py ${PADDLE_ROOT}/paddle/fluid/API_DEV.source.md5 ${PADDLE_ROOT}/paddle/fluid/API_PR.source.md5` if [ "$api_spec_diff" != "" ]; then - echo_line="${echo_line}Related APIs: ${api_spec_diff}\n" - echo_line="You must have one RD (zhiqiu (Recommend) or phlrain) approval for the api change for the opreator-related api without 'core.ops'.\n" + echo_line="APIs without core.ops: \n${api_spec_diff}\n" + echo_line="${echo_line}You must have one RD (zhiqiu (Recommend) or phlrain) approval for the api change for the opreator-related api without 'core.ops'.\n" echo_line="${echo_line}For more details, please click [https://github.com/PaddlePaddle/Paddle/wiki/paddle_api_development_manual.md]\n" check_approval 1 6888866 43953930 fi -- GitLab From e1fb77d123898e3d1d0a6e42e68e9c91dbcda746 Mon Sep 17 00:00:00 2001 From: ruri Date: Thu, 24 Sep 2020 14:25:40 +0800 Subject: [PATCH 071/117] [2.0RC]refine error message in shuffle channel OP (#27505) * refine err msg in shuffle channel op --- paddle/fluid/operators/shuffle_channel_op.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/operators/shuffle_channel_op.cc b/paddle/fluid/operators/shuffle_channel_op.cc index c01fed108f3..119d2e72369 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cc +++ b/paddle/fluid/operators/shuffle_channel_op.cc @@ -21,13 +21,13 @@ class ShuffleChannelOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of ShuffleChannelOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of ShuffleChannelOp should not be null."); + OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "ShuffleChannelOp"); + OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "ShuffleChannelOp"); auto input_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE(input_dims.size() == 4, "The layout of input is NCHW."); + PADDLE_ENFORCE_EQ( + input_dims.size(), 4, + platform::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim("Out", input_dims); } @@ -53,7 +53,8 @@ class ShuffleChannelOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("group", "the number of groups.") .SetDefault(1) .AddCustomChecker([](const int& group) { - PADDLE_ENFORCE_GE(group, 1, "group should be larger than 0."); + PADDLE_ENFORCE_GE(group, 1, platform::errors::InvalidArgument( + "group should be larger than 0.")); }); AddComment(R"DOC( @@ -76,7 +77,9 @@ class ShuffleChannelGradOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { auto input_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE(input_dims.size() == 4, "The layout of input is NCHW."); + PADDLE_ENFORCE_EQ( + input_dims.size(), 4, + platform::errors::InvalidArgument("The layout of input is NCHW.")); ctx->SetOutputDim(framework::GradVarName("X"), input_dims); } -- GitLab From b6ecf35627d80e18470723a8d43c11826aabd6c8 Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Thu, 24 Sep 2020 14:40:55 +0800 Subject: [PATCH 072/117] disable test_paddle_save_load,test=document_fix (#27534) --- python/paddle/fluid/tests/unittests/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 8d236dca22f..94bc6235ad1 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -334,6 +334,8 @@ list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) list(REMOVE_ITEM TEST_OPS test_sampling_id_op) +list(REMOVE_ITEM TEST_OPS test_paddle_save_load) + if (APPLE OR WIN32) -- GitLab From ec4155d7d000da2660bf465b6995ac3424235dba Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 24 Sep 2020 15:40:03 +0800 Subject: [PATCH 073/117] windows lib size crop from 5.4G to 3.9G (#27477) --- cmake/generic.cmake | 9 +++ cmake/inference_lib.cmake | 11 ++- cmake/init.cmake | 3 + cmake/paddle_win.props | 91 +++++++++++++++++++++++ paddle/fluid/inference/CMakeLists.txt | 9 +-- paddle/fluid/inference/api/demo_ci/run.sh | 5 -- 6 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 cmake/paddle_win.props diff --git a/cmake/generic.cmake b/cmake/generic.cmake index b0a6dfe2902..3bdf7c209b4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -446,6 +446,9 @@ function(nv_library TARGET_NAME) message(FATAL "Please specify source file or library in nv_library.") endif() endif(nv_library_SRCS) + if (WIN32) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + endif(WIN32) endif() endfunction(nv_library) @@ -461,6 +464,9 @@ function(nv_binary TARGET_NAME) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) common_link(${TARGET_NAME}) endif() + if (WIN32) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + endif(WIN32) endif() endfunction(nv_binary) @@ -482,6 +488,9 @@ function(nv_test TARGET_NAME) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true) + if (WIN32) + set_target_properties(${TARGET_NAME} PROPERTIES VS_USER_PROPS ${WIN_PROPS}) + endif(WIN32) endif() endfunction(nv_test) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index f19f0eb43d3..f4603051a0e 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -19,9 +19,8 @@ set(PADDLE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_install_dir" CACHE STRING set(PADDLE_INFERENCE_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_install_dir" CACHE STRING "A path setting paddle inference shared and static libraries") -# TODO(zhaolong) -# At present, the size of static lib in Windows exceeds the system limit, -# so the generation of static lib is temporarily turned off. +# At present, the size of static lib in Windows is very large, +# so we need to crop the library size. if(WIN32) #todo: remove the option option(WITH_STATIC_LIB "Compile demo with static/shared library, default use dynamic." OFF) @@ -196,7 +195,11 @@ set(PADDLE_INFERENCE_C_INSTALL_DIR "${CMAKE_BINARY_DIR}/paddle_inference_c_insta copy_part_of_thrid_party(inference_lib_dist ${PADDLE_INFERENCE_C_INSTALL_DIR}) set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") -set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*) +if(WIN32) + set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/${CMAKE_BUILD_TYPE}/paddle_fluid_c.*) +else(WIN32) + set(paddle_fluid_c_lib ${PADDLE_BINARY_DIR}/paddle/fluid/inference/capi/libpaddle_fluid_c.*) +endif(WIN32) copy(inference_lib_dist SRCS ${src_dir}/inference/capi/paddle_c_api.h ${paddle_fluid_c_lib} diff --git a/cmake/init.cmake b/cmake/init.cmake index 7dfe60f9dd8..902dfb11fc0 100644 --- a/cmake/init.cmake +++ b/cmake/init.cmake @@ -26,4 +26,7 @@ if(WITH_GPU) set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG") endif() +if(WIN32) + set(WIN_PROPS ${CMAKE_SOURCE_DIR}/cmake/paddle_win.props) +endif() diff --git a/cmake/paddle_win.props b/cmake/paddle_win.props new file mode 100644 index 00000000000..7e434c6d907 --- /dev/null +++ b/cmake/paddle_win.props @@ -0,0 +1,91 @@ + + + + + + true + false + true + false + false + InheritFromHost + InheritFromHost + InheritFromHost + InheritFromHost + InheritFromHost + + -ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions] + --use-local-env + [CodeGeneration] + -clean + + -Xcompiler "/EHsc [Warning] /nologo [Optimization] [ProgramDataBaseFileName] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]" + + %(BaseCommandLineTemplate) [CompileOut] "%(FullPath)" + %(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)" + + +# (Approximate command-line. Settings inherited from host are not visible below.) +# (Please see the output window after a build for the full command-line) + +# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx) +set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)" +"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate) + +# Runtime API (NVCC Compilation Type is hybrid object or .c file) +set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)" +"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate) + + Compiling CUDA source file %(Identity)... + Skipping CUDA source file %(Identity) (excluded from build). + + + %(Filename)%(Extension).cache + $(IntDir)%(PropsCacheOutputFile) + + $(MSBuildProjectFullPath) + + + + true + $(IntDir)$(TargetName).device-link.obj + + + true + + true + + InheritFromProject + InheritFromProject + + + + + + + + + + + + + -Xcompiler "/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]" + "$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs] + +# (Approximate command-line. Settings inherited from host are not visible below.) +# (Please see the output window after a build for the full command-line) + +%(LinkCommandLineTemplate) + + + + + %(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + + + + %(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + + + + diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index cf6fcb7b643..f85e1f65116 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -44,14 +44,9 @@ add_subdirectory(api) set(STATIC_INFERENCE_API paddle_inference_api analysis_predictor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder activation_functions ${mkldnn_quantizer_cfg}) -# TODO(xingzhaolong, jiweibo): remove this and create_static_lib(paddle_fluid) on windows GPU -if(WIN32 AND WITH_GPU) - cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_API}) -else() - create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) -endif() +create_static_lib(paddle_fluid ${fluid_modules} ${STATIC_INFERENCE_API}) -if(NOT APPLE AND NOT WIN32) +if(NOT APPLE) # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac. set(LINK_FLAGS "-Wl,--retain-symbols-file ${CMAKE_CURRENT_SOURCE_DIR}/paddle_fluid.sym") set_target_properties(paddle_fluid PROPERTIES LINK_FLAGS "${LINK_FLAGS}") diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index a3e7bec398a..6d283ca56cb 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -68,11 +68,6 @@ rm -rf * for WITH_STATIC_LIB in ON OFF; do if [ $(echo `uname` | grep "Win") != "" ]; then - # TODO(xingzhaolong, jiweibo): remove this if windows GPU library is ready. - if [ $TEST_GPU_CPU == ON] && [ $WITH_STATIC_LIB ==ON ]; then - return 0 - fi - # -----simple_on_word2vec on windows----- cmake .. -G "Visual Studio 14 2015" -A x64 -DPADDLE_LIB=${inference_install_dir} \ -DWITH_MKL=$TURN_ON_MKL \ -- GitLab From 59c049995e036f80fc7e068a432037a9c8a4a014 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 24 Sep 2020 15:40:31 +0800 Subject: [PATCH 074/117] change test main process wait time (#27516) --- .../fluid/tests/unittests/test_imperative_signal_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py index b47834ffab8..a0da4b0efee 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_signal_handler.py @@ -49,7 +49,7 @@ class TestDygraphDataLoaderSingalHandler(unittest.TestCase): test_process.start() set_child_signal_handler(id(self), test_process.pid) - time.sleep(3) + time.sleep(5) except core.EnforceNotMet as ex: self.assertIn("FatalError", cpt.get_exception_message(ex)) exception = ex -- GitLab From d7f422c9844b33efcb02c7f388ec97f68e42ce99 Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Thu, 24 Sep 2020 16:07:57 +0800 Subject: [PATCH 075/117] fix error message in conv/conv_transpose. test=develop (#27464) * fix error message in conv/conv_transpose. test=develop --- paddle/fluid/operators/conv_cudnn_op.cu | 60 +++++++++++-------- paddle/fluid/operators/conv_op.h | 18 ++++-- .../operators/conv_transpose_cudnn_op.cu | 20 +++---- paddle/fluid/operators/conv_transpose_op.h | 12 +++- 4 files changed, 70 insertions(+), 40 deletions(-) diff --git a/paddle/fluid/operators/conv_cudnn_op.cu b/paddle/fluid/operators/conv_cudnn_op.cu index 00af724ac7f..f8b76f387cc 100644 --- a/paddle/fluid/operators/conv_cudnn_op.cu +++ b/paddle/fluid/operators/conv_cudnn_op.cu @@ -50,8 +50,9 @@ class CUDNNConvOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); const Tensor* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -60,14 +61,16 @@ class CUDNNConvOpKernel : public framework::OpKernel { std::vector paddings = ctx.Attr>("paddings"); std::vector dilations = ctx.Attr>("dilations"); int groups = ctx.Attr("groups"); + bool exhaustive_search = FLAGS_cudnn_exhaustive_search || ctx.Attr("exhaustive_search"); + bool deterministic = FLAGS_cudnn_deterministic; + auto exhaustive_deterministic = exhaustive_search && deterministic; + PADDLE_ENFORCE_EQ(exhaustive_deterministic, false, + platform::errors::InvalidArgument( + "Cann't set exhaustive_search True and " + "FLAGS_cudnn_deterministic True at same time.")); - if (exhaustive_search && FLAGS_cudnn_deterministic) { - PADDLE_THROW( - "Cann't set exhaustive_search True and " - "FLAGS_cudnn_deterministic True at same time."); - } const std::string padding_algorithm = ctx.Attr("padding_algorithm"); const std::string data_format = ctx.Attr("data_format"); @@ -197,7 +200,8 @@ class CUDNNConvOpKernel : public framework::OpKernel { &transformed_input); } break; default: - PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); + PADDLE_THROW(platform::errors::InvalidArgument( + "ConvOp only support tensors with 4 or 5 dimensions.")); } } else { @@ -317,8 +321,9 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -337,14 +342,16 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { std::vector paddings = ctx.Attr>("paddings"); std::string padding_algorithm = ctx.Attr("padding_algorithm"); int groups = ctx.Attr("groups"); + bool exhaustive_search = FLAGS_cudnn_exhaustive_search || ctx.Attr("exhaustive_search"); bool deterministic = FLAGS_cudnn_deterministic; - if (exhaustive_search && deterministic) { - PADDLE_THROW( - "Can't set exhaustive_search True and " - "FLAGS_cudnn_deterministic True at same time."); - } + auto exhaustive_deterministic = exhaustive_search && deterministic; + PADDLE_ENFORCE_EQ(exhaustive_deterministic, false, + platform::errors::InvalidArgument( + "Cann't set exhaustive_search True and " + "FLAGS_cudnn_deterministic True at same time.")); + const std::string data_format = ctx.Attr("data_format"); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); @@ -495,7 +502,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel { &transformed_input); } break; default: - PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); + PADDLE_THROW(platform::errors::InvalidArgument( + "ConvOp only support tensors with 4 or 5 dimensions.")); } } else { transformed_input.ShareDataWith(transformed_input_channel); @@ -701,8 +709,9 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); auto X = ctx.Input("Input"); auto W = ctx.Input("Filter"); auto dO = ctx.Input("DOutput"); @@ -736,14 +745,16 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { const std::vector& strides = ctx.Attr>("strides"); std::vector dilations = ctx.Attr>("dilations"); int groups = ctx.Attr("groups"); + bool exhaustive_search = FLAGS_cudnn_exhaustive_search || ctx.Attr("exhaustive_search"); bool deterministic = FLAGS_cudnn_deterministic; - if (exhaustive_search && deterministic) { - PADDLE_THROW( - "Can't set exhaustive_search True and " - "FLAGS_cudnn_deterministic True at same time."); - } + auto exhaustive_deterministic = exhaustive_search && deterministic; + PADDLE_ENFORCE_EQ(exhaustive_deterministic, false, + platform::errors::InvalidArgument( + "Cann't set exhaustive_search True and " + "FLAGS_cudnn_deterministic True at same time.")); + std::vector paddings = ctx.Attr>("paddings"); std::string padding_algorithm = ctx.Attr("padding_algorithm"); @@ -878,7 +889,8 @@ class CUDNNConvDoubleGradOpKernel : public framework::OpKernel { } } break; default: - PADDLE_THROW("ConvOp only support tensors with 4 or 5 dimensions."); + PADDLE_THROW(platform::errors::InvalidArgument( + "ConvOp only support tensors with 4 or 5 dimensions.")); } } else { diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 8a5345e3cf8..662fac9e77e 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -685,8 +685,9 @@ class GemmConvDoubleGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { auto& dev_ctx = ctx.template device_context(); - PADDLE_ENFORCE_EQ(platform::is_cpu_place(ctx.GetPlace()), true, - "It must use CPUPlace."); + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CPUPlace.")); const Tensor* X = ctx.Input("Input"); const Tensor* dY = ctx.Input("DOutput"); const Tensor* ddX = ctx.Input("DDInput"); @@ -982,11 +983,20 @@ class DepthwiseConvKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( output->dims()[output->dims().size() - 1] % input->dims()[input->dims().size() - 1], - 0, "The output channels must be a multiple of the input channels"); + 0, platform::errors::InvalidArgument( + "ShapeError: The output channels must be a multiple of the " + "input channels. But receivced output channel number is %d " + "and input channel number is %d", + output->dims()[output->dims().size() - 1], + input->dims()[input->dims().size() - 1])); } else { PADDLE_ENFORCE_EQ( output->dims()[1] % input->dims()[1], 0, - "The output channels must be a multiple of the input channels"); + platform::errors::InvalidArgument( + "ShapeError: The output channels must be a multiple of the " + "input channels. But receivced output channel number is %d " + "and input channel number is %d", + output->dims()[1], input->dims()[1])); } // transform tensor Tensor transformed_input(input->type()); diff --git a/paddle/fluid/operators/conv_transpose_cudnn_op.cu b/paddle/fluid/operators/conv_transpose_cudnn_op.cu index 99ec1e04810..5249264b1c9 100644 --- a/paddle/fluid/operators/conv_transpose_cudnn_op.cu +++ b/paddle/fluid/operators/conv_transpose_cudnn_op.cu @@ -51,8 +51,9 @@ template class CUDNNConvTransposeOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); auto* input = ctx.Input("Input"); auto* filter = ctx.Input("Filter"); auto* output = ctx.Output("Output"); @@ -145,9 +146,8 @@ class CUDNNConvTransposeOpKernel : public framework::OpKernel { ctx, input_pad, input_transpose, pad_value, &transformed_input); } break; default: - PADDLE_ENFORCE_EQ( - rank == 4 || rank == 5, true, - "Op(ConvTranspose) only supports 4-D or 5-D input Tensor."); + PADDLE_THROW(platform::errors::InvalidArgument( + "Op(ConvTranspose) only supports 4-D or 5-D input Tensor.")); } } else { transformed_input = input_transpose; @@ -290,8 +290,9 @@ template class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), - "It must use CUDAPlace."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ctx.GetPlace()), true, + paddle::platform::errors::PreconditionNotMet("It must use CUDAPlace.")); auto input = ctx.Input("Input"); auto filter = ctx.Input("Filter"); auto output_grad = ctx.Input(framework::GradVarName("Output")); @@ -393,9 +394,8 @@ class CUDNNConvTransposeGradOpKernel : public framework::OpKernel { &transformed_output_grad); } break; default: - PADDLE_ENFORCE_EQ( - rank == 4 || rank == 5, true, - "Op(ConvTranspose) only supports 4-D or 5-D input Tensor."); + PADDLE_THROW(platform::errors::InvalidArgument( + "Op(ConvTranspose) only supports 4-D or 5-D input Tensor.")); } } else { transformed_output_grad = output_grad_transpose; diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h index 59b3677acc4..1ea869e002a 100644 --- a/paddle/fluid/operators/conv_transpose_op.h +++ b/paddle/fluid/operators/conv_transpose_op.h @@ -580,7 +580,12 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel { output->mutable_data(context.GetPlace()); int groups = context.Attr("groups"); - PADDLE_ENFORCE_EQ(groups, filter.dims()[0]); + PADDLE_ENFORCE_EQ( + groups, filter.dims()[0], + platform::errors::InvalidArgument( + "groups should be error to the 1st dimension of filter. But " + "received groups is %d and filter dimension[0] is %d", + groups, filter.dims()[0])); std::vector strides = context.Attr>("strides"); std::vector paddings = context.Attr>("paddings"); @@ -588,7 +593,10 @@ class DepthwiseConvTransposeKernel : public framework::OpKernel { std::string padding_algorithm = context.Attr("padding_algorithm"); for (auto v : dilations) { - PADDLE_ENFORCE_EQ(v, 1); + PADDLE_ENFORCE_EQ(v, 1, platform::errors::InvalidArgument( + "dilations should be 1 in depthwise conv. " + "But received dilations is %d", + v)); } auto in_dims = input->dims(); -- GitLab From 273f58a3c5cbd805ba4c2e4524d950cd34bb6674 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Thu, 24 Sep 2020 16:11:10 +0800 Subject: [PATCH 076/117] Decrease Random Failure Probability for test_parallel_executor_mnist, test=develop (#27498) As the title, decrease random failure probability for test_parallel_executor_mnist The old code set larger delta when comparing reduce and all reduce, but didn't set all. I added it. On my linux machine, I run 100 times, no failure occurs. In addition, we only saw this random failure on CI two times since I worked. I thought it was rare and I just increased the delta. --- .../fluid/tests/unittests/test_parallel_executor_mnist.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index a2f1d774608..da7e30ff106 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -124,8 +124,10 @@ class TestMNIST(TestParallelExecutorBase): def test_simple_fc_with_new_strategy(self): # use_cuda, use_reduce - self._compare_reduce_and_allreduce(simple_fc_net, True) - self._compare_reduce_and_allreduce(simple_fc_net, False) + # NOTE: the computation result of nccl_reduce is non-deterministic, + # related issue: https://github.com/NVIDIA/nccl/issues/157 + self._compare_reduce_and_allreduce(simple_fc_net, True, 1e-5, 1e-2) + self._compare_reduce_and_allreduce(simple_fc_net, False, 1e-5, 1e-2) def check_simple_fc_parallel_accuracy(self, use_cuda): if use_cuda and not core.is_compiled_with_cuda(): @@ -179,7 +181,7 @@ class TestMNIST(TestParallelExecutorBase): # NOTE: the computation result of nccl_reduce is non-deterministic, # related issue: https://github.com/NVIDIA/nccl/issues/157 self._compare_reduce_and_allreduce(fc_with_batchnorm, True, 1e-5, 1e-2) - self._compare_reduce_and_allreduce(fc_with_batchnorm, False) + self._compare_reduce_and_allreduce(fc_with_batchnorm, False, 1e-5, 1e-2) if __name__ == '__main__': -- GitLab From b7319ef51893a8b575dde283360fde2dbc316ce5 Mon Sep 17 00:00:00 2001 From: ruri Date: Thu, 24 Sep 2020 16:43:04 +0800 Subject: [PATCH 077/117] fix err msg in pixel shuffle op (#27503) --- paddle/fluid/operators/pixel_shuffle_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/pixel_shuffle_op.cc b/paddle/fluid/operators/pixel_shuffle_op.cc index 70d232ad6a5..111a82c6cce 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.cc +++ b/paddle/fluid/operators/pixel_shuffle_op.cc @@ -46,14 +46,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "The square of upscale_factor[%u] should divide the " "number of channel[%u]", - input_dims[1], upscale_factor * upscale_factor)); + upscale_factor * upscale_factor, input_dims[1])); } else { PADDLE_ENFORCE_EQ( input_dims[3] % (upscale_factor * upscale_factor), 0, platform::errors::InvalidArgument( "The square of upscale_factor[%u] should divide the " "number of channel[%u]", - input_dims[3], upscale_factor * upscale_factor)); + upscale_factor * upscale_factor, input_dims[3])); } auto output_dims = input_dims; output_dims[0] = input_dims[0]; -- GitLab From df7fabeedc87c663b3d8e285836b3770ceb10957 Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 24 Sep 2020 17:19:20 +0800 Subject: [PATCH 078/117] Fix memory leak for mkldnn. (#27493) --- .../fluid/inference/api/analysis_predictor.cc | 34 ++++++++++++++++--- .../fluid/inference/api/analysis_predictor.h | 11 ++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 42e62011f84..6c68b385bcb 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -245,7 +245,18 @@ bool AnalysisPredictor::PrepareExecutor() { void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { #ifdef PADDLE_WITH_MKLDNN - VLOG(2) << "AnalysisPredictor::Run get_cur_mkldnn_session_id=" + std::vector> inputs_shape; + for (size_t i = 0; i < inputs.size(); ++i) { + inputs_shape.emplace_back(inputs[i].shape); + } + MkldnnPreSet(inputs_shape); +#endif +} + +void AnalysisPredictor::MkldnnPreSet( + const std::vector> &inputs_shape) { +#ifdef PADDLE_WITH_MKLDNN + VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id=" << platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id(); // In cache clearing mode. if (config_.mkldnn_cache_capacity_ > 0) { @@ -257,9 +268,9 @@ void AnalysisPredictor::MkldnnPreSet(const std::vector &inputs) { config_.mkldnn_cache_capacity_); // Set current_input_shape for caching dynamic shape. std::stringstream ss; - for (size_t i = 0; i < inputs.size(); ++i) { - for (size_t j = 0; j < inputs[i].shape.size(); ++j) { - ss << inputs[i].shape[j] << "-"; + for (size_t i = 0; i < inputs_shape.size(); ++i) { + for (size_t j = 0; j < inputs_shape[i].size(); ++j) { + ss << inputs_shape[i][j] << "-"; } } VLOG(2) << "Set input shape=" << ss.str(); @@ -742,6 +753,18 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) { + std::vector> shape_vector; + auto names = GetInputNames(); + for (size_t i = 0; i < names.size(); ++i) { + auto in_tensor = GetInputTensor(names[i]); + shape_vector.emplace_back(in_tensor->shape()); + } + MkldnnPreSet(shape_vector); + } +#endif + executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); @@ -750,6 +773,9 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); +#ifdef PADDLE_WITH_MKLDNN + if (config_.use_mkldnn_) MkldnnPostReset(); +#endif #if defined(PADDLE_WITH_MKLML) // Frees unused memory allocated by the Intel® MKL Memory Allocator to // avoid memory leak. See: diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 365f86c2110..c4a7173b010 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -317,6 +317,17 @@ class AnalysisPredictor : public PaddlePredictor { /// \param[in] inputs tensors /// void MkldnnPreSet(const std::vector &inputs); + + /// + /// \brief PreSet for Mkldnn multi-thread and dynamic shape input. + /// + /// Used in AnalysisPredictor::Run(), do not support + /// AnalysisPredictor::ZeroCopyRun() now. + /// + /// \param[in] inputs tensor shape + /// + void MkldnnPreSet(const std::vector> &inputs_shape); + /// /// \brief PostReset for Mkldnn multi-thread and dynamic shape input. /// -- GitLab From 32ad4f90a4b9c5fc38f6480b5a024ba44f654ee2 Mon Sep 17 00:00:00 2001 From: 123malin Date: Thu, 24 Sep 2020 20:59:21 +0800 Subject: [PATCH 079/117] =?UTF-8?q?=E3=80=90paddle.fleet=E3=80=91=20Usages?= =?UTF-8?q?=20Change:=20from=20fleet.util()=20to=20fleet.util=20(#27468)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * test=develop, bug fix --- python/paddle/distributed/fleet/__init__.py | 6 +- .../distributed/fleet/base/fleet_base.py | 32 ++-------- .../distributed/fleet/base/util_factory.py | 63 ++++++++++--------- python/paddle/distributed/fleet/launch.py | 10 ++- .../distributed/fleet/utils/__init__.py | 2 + python/paddle/distributed/fleet/utils/fs.py | 52 +++++++-------- .../fluid/tests/unittests/dist_fleet_ctr.py | 7 +-- .../tests/unittests/dist_fleet_ctr_ps_gpu.py | 7 +-- .../tests/unittests/dist_fleet_heter_ctr.py | 3 +- .../tests/unittests/dist_fleet_simnet_bow.py | 5 +- .../tests/unittests/test_dist_fleet_base.py | 7 +-- .../unittests/test_dist_fleet_heter_base.py | 5 +- .../test_dist_fleet_heter_program.py | 1 - .../fluid/tests/unittests/test_fleet_base.py | 2 +- .../unittests/test_fleet_rolemaker_new.py | 12 ++-- .../fluid/tests/unittests/test_fleet_util.py | 50 ++++++++------- 16 files changed, 120 insertions(+), 144 deletions(-) diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index f3ee09a6d9e..e89cb1f5ec4 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -13,7 +13,7 @@ # limitations under the License. # TODO: define distributed api under this directory, -from .base.role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker +from .base.role_maker import Role, UserDefinedRoleMaker, PaddleCloudRoleMaker from .base.distributed_strategy import DistributedStrategy from .base.fleet_base import Fleet from .base.util_factory import UtilBase @@ -26,6 +26,7 @@ __all__ = [ "UserDefinedRoleMaker", "PaddleCloudRoleMaker", "Fleet", + "Role", ] fleet = Fleet() @@ -39,8 +40,7 @@ server_num = fleet.server_num server_index = fleet.server_index server_endpoints = fleet.server_endpoints is_server = fleet.is_server -set_util = fleet.set_util -util = fleet.util +util = UtilBase() barrier_worker = fleet.barrier_worker init_worker = fleet.init_worker init_server = fleet.init_server diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index d00faac8385..d0658efdca3 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -23,7 +23,6 @@ from .strategy_compiler import StrategyCompiler from .distributed_strategy import DistributedStrategy from .meta_optimizer_factory import MetaOptimizerFactory from .runtime_factory import RuntimeFactory -from .util_factory import UtilFactory from paddle.fluid.wrapped_decorator import wrap_decorator from paddle.fluid.dygraph import parallel_helper @@ -120,7 +119,6 @@ class Fleet(object): self.strategy_compiler = None self._is_collective = False self._runtime_handle = None - self._util = None def init(self, role_maker=None, is_collective=False): """ @@ -182,6 +180,9 @@ class Fleet(object): format(type(role_maker))) self._role_maker._generate_role() + import paddle.distributed.fleet as fleet + fleet.util._set_role_maker(self._role_maker) + self.strategy_compiler = StrategyCompiler() if paddle.fluid.framework.in_dygraph_mode(): if parallel_helper._is_parallel_ctx_initialized(): @@ -353,29 +354,6 @@ class Fleet(object): return self._role_maker._is_server( ) or self._role_maker._is_heter_worker() - def set_util(self, util): - self._util = util - - def util(self): - """ - Utility functions that can be used under certain runtime - return util - - Returns: - UtilBase: instance of UtilBase, can use distributed ops/tools easily. - - Examples: - - .. code-block:: python - import paddle.distributed.fleet as fleet - fleet.init() - util = fleet.util - files = ["1.log", "2.log", "3.log", "4.log"] - files = util.get_file_shard() - - """ - return self._util - def barrier_worker(self): """ barrier all workers @@ -1102,7 +1080,7 @@ class Fleet(object): if self._runtime_handle is None: self._runtime_handle = RuntimeFactory()._create_runtime(context) - if self._util is None: - self._util = UtilFactory()._create_util(context) + import paddle.distributed.fleet as fleet + fleet.util._set_strategy(context["valid_strategy"]) return optimize_ops, params_grads diff --git a/python/paddle/distributed/fleet/base/util_factory.py b/python/paddle/distributed/fleet/base/util_factory.py index efaa854c087..bbb7d60ed9c 100644 --- a/python/paddle/distributed/fleet/base/util_factory.py +++ b/python/paddle/distributed/fleet/base/util_factory.py @@ -73,11 +73,13 @@ class UtilBase(object): .. code-block:: python # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet from paddle.distributed.fleet import PaddleCloudRoleMaker import sys import numpy as np + import os + + os.environ["PADDLE_WITH_GLOO"] = "2" def train(): role = PaddleCloudRoleMaker( @@ -85,19 +87,18 @@ class UtilBase(object): init_gloo=True, path="./tmp_gloo") fleet.init(role) - fleet_util._set_role_maker(role) if fleet.is_server(): input = [1, 2] - output = fleet_util.all_reduce(input, "sum", "server") + output = fleet.util.all_reduce(input, "sum", "server") print(output) # [2, 4] elif fleet.is_worker(): input = np.array([3, 4]) - output = fleet_util.all_reduce(input, "sum", "worker") + output = fleet.util.all_reduce(input, "sum", "worker") print(output) # [6, 8] - output = fleet_util.all_reduce(input, "sum", "all") + output = fleet.util.all_reduce(input, "sum", "all") print(output) # [8, 12] if __name__ == "__main__": @@ -117,10 +118,12 @@ class UtilBase(object): .. code-block:: python # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet from paddle.distributed.fleet import PaddleCloudRoleMaker import sys + import os + + os.environ["PADDLE_WITH_GLOO"] = "2" def train(): role = PaddleCloudRoleMaker( @@ -128,15 +131,14 @@ class UtilBase(object): init_gloo=True, path="./tmp_gloo") fleet.init(role) - fleet_util._set_role_maker(role) if fleet.is_server(): - fleet_util.barrier("server") + fleet.util.barrier("server") print("all server arrive here") elif fleet.is_worker(): - fleet_util.barrier("worker") + fleet.util.barrier("worker") print("all server arrive here") - fleet_util.barrier("all") + fleet.util.barrier("all") print("all servers and workers arrive here") if __name__ == "__main__": @@ -160,10 +162,12 @@ class UtilBase(object): .. code-block:: python # Save the following code in `train.py` , and then execute the command `fleetrun --server_num 2 --worker_num 2 train.py` . - from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet as fleet from paddle.distributed.fleet import PaddleCloudRoleMaker import sys + import os + + os.environ["PADDLE_WITH_GLOO"] = "2" def train(): role = PaddleCloudRoleMaker( @@ -171,19 +175,18 @@ class UtilBase(object): init_gloo=True, path="./tmp_gloo") fleet.init(role) - fleet_util._set_role_maker(role) if fleet.is_server(): input = fleet.server_index() - output = fleet_util.all_gather(input, "server") + output = fleet.util.all_gather(input, "server") print(output) # output = [0, 1] elif fleet.is_worker(): input = fleet.worker_index() - output = fleet_util.all_gather(input, "worker") + output = fleet.util.all_gather(input, "worker") # output = [0, 1] print(output) - output = fleet_util.all_gather(input, "all") + output = fleet.util.all_gather(input, "all") print(output) # output = [0, 1, 0, 1] @@ -220,18 +223,20 @@ class UtilBase(object): .. code-block:: python - from paddle.distributed.fleet.base.util_factory import fleet_util - import paddle.distributed.fleet.base.role_maker as role_maker + import paddle.distributed.fleet as fleet + from paddle.distributed.fleet import UserDefinedRoleMaker - role = role_maker.UserDefinedRoleMaker( + role = UserDefinedRoleMaker( is_collective=False, init_gloo=False, current_id=0, - role=role_maker.Role.WORKER, + role=fleet.Role.WORKER, worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) - fleet_util._set_role_maker(role) - files = fleet_util.get_file_shard(["file1", "file2", "file3"]) + fleet.init(role) + + files = fleet.util.get_file_shard(["file1", "file2", "file3"]) + print(files) # files = ["file1", "file2"] """ if not isinstance(files, list): @@ -267,18 +272,19 @@ class UtilBase(object): .. code-block:: python - from paddle.distributed.fleet.base.util_factory import fleet_util - import paddle.distributed.fleet.base.role_maker as role_maker + import paddle.distributed.fleet as fleet + from paddle.distributed.fleet import UserDefinedRoleMaker - role = role_maker.UserDefinedRoleMaker( + role = UserDefinedRoleMaker( is_collective=False, init_gloo=False, current_id=0, - role=role_maker.Role.WORKER, + role=fleet.Role.WORKER, worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) - fleet_util._set_role_maker(role) - fleet_util.print_on_rank("I'm worker 0", 0) + fleet.init(role) + + fleet.util.print_on_rank("I'm worker 0", 0) """ if self.role_maker._worker_index() != rank_id: return @@ -577,6 +583,3 @@ class UtilBase(object): print("fetch_targets name: %s" % v.name) print("fetch_targets: {}".format(results[i])) return results - - -fleet_util = UtilFactory()._create_util(None) diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index d63c9f9184c..c589e4f26a0 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -181,8 +181,8 @@ def get_gpus(gpus): cuda_visible_devices_list = cuda_visible_devices.split(',') for x in gpus.split(','): assert x in cuda_visible_devices_list, "Can't find "\ - "your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ - % (x, cuda_visible_devices) + "your gpus %s in CUDA_VISIBLE_DEVICES[%s]."\ + % (x, cuda_visible_devices) res_gpus = [ cuda_visible_devices_list.index(x.strip()) for x in gpus.split(',') @@ -348,8 +348,7 @@ def launch_ps(args): "PADDLE_PORT": cur_server.endpoint.split(":")[1], "TRAINING_ROLE": "PSERVER", "PADDLE_TRAINERS_NUM": str(worker_num), - "POD_IP": cur_server.endpoint.split(":")[0], - "PADDLE_WITH_GLOO": "1" + "POD_IP": cur_server.endpoint.split(":")[0] } current_env.update(proc_env) @@ -388,8 +387,7 @@ def launch_ps(args): "PADDLE_TRAINER_ENDPOINTS": worker_endpoints, "PADDLE_TRAINERS_NUM": str(worker_num), "TRAINING_ROLE": "TRAINER", - "PADDLE_TRAINER_ID": str(cur_worker.rank), - "PADDLE_WITH_GLOO": "1" + "PADDLE_TRAINER_ID": str(cur_worker.rank) } current_env.update(proc_env) diff --git a/python/paddle/distributed/fleet/utils/__init__.py b/python/paddle/distributed/fleet/utils/__init__.py index abf198b97e6..a45e1682c3f 100644 --- a/python/paddle/distributed/fleet/utils/__init__.py +++ b/python/paddle/distributed/fleet/utils/__init__.py @@ -11,3 +11,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from .fs import LocalFS, HDFSClient diff --git a/python/paddle/distributed/fleet/utils/fs.py b/python/paddle/distributed/fleet/utils/fs.py index b7c50bda3ea..8d4f24fb116 100644 --- a/python/paddle/distributed/fleet/utils/fs.py +++ b/python/paddle/distributed/fleet/utils/fs.py @@ -120,7 +120,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() subdirs, files = client.ls_dir("./") @@ -140,7 +140,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() subdirs, files = client.ls_dir("./") @@ -160,7 +160,7 @@ class LocalFS(FS): def mkdirs(self, fs_path): """ - Create a remote HDFS directory. + Create a local directory. Args: fs_path(str): The local directory path. @@ -168,7 +168,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.mkdirs("test_mkdirs") @@ -189,7 +189,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.touch("test_rename_src") @@ -217,7 +217,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.mkdirs("test_localFS_mkdirs") @@ -247,7 +247,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.touch("test_is_file") @@ -269,7 +269,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.mkdirs("test_is_dir") @@ -292,7 +292,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() ret = local_fs.is_exist("test_is_exist") @@ -311,7 +311,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.touch("test_touch") @@ -332,13 +332,11 @@ class LocalFS(FS): src_path(str): Name of the file or directory, that's needed to be moved. dst_path(str): Name of the file or directory to which to move to. overwrite(bool): Whether to re-write `dst_path` if that exists. Default is False. - test_exists(bool): Check the existence of `src_path` and `dst_path` . - When `test_exists` is set true, if `src_path` doesn't exist or `dst_path` exists, program will throw an Excetption. Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() client.touch("test_mv_src") @@ -369,7 +367,7 @@ class LocalFS(FS): Examples: .. code-block:: python - from paddle.distributed.fleet.utils.fs import LocalFS + from paddle.distributed.fleet.utils import LocalFS client = LocalFS() subdirs = client.list_dirs("./") @@ -432,7 +430,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -493,7 +491,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -526,7 +524,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -587,7 +585,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -629,7 +627,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -661,7 +659,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -695,7 +693,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -740,7 +738,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -784,7 +782,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -830,7 +828,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -893,7 +891,7 @@ class HDFSClient(FS): .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { @@ -919,12 +917,14 @@ class HDFSClient(FS): Args: fs_path(str): The HDFS file path. + exist_ok(bool): When `fs_path` exists, if `exist_ok` is set false, + program will throw an Exception. Default is true. Examples: .. code-block:: text - from paddle.distributed.fleet.utils.fs import HDFSClient + from paddle.distributed.fleet.utils import HDFSClient hadoop_home = "/home/client/hadoop-client/hadoop/" configs = { diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 5721445c414..f650dd0f7e9 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -28,7 +28,6 @@ import numpy as np import ctr_dataset_reader from test_dist_fleet_base import runtime_main, FleetDistRunnerBase -from paddle.distributed.fleet.base.util_factory import fleet_util paddle.enable_static() @@ -180,13 +179,13 @@ class TestDistCTR2x2(FleetDistRunnerBase): fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) # TODO(randomly fail) - # reduce_output = fleet_util.all_reduce( + # reduce_output = fleet.util.all_reduce( # np.array(loss_val), mode="sum") - # loss_all_trainer = fleet_util.all_gather(float(loss_val)) + # loss_all_trainer = fleet.util.all_gather(float(loss_val)) # loss_val = float(reduce_output) / len(loss_all_trainer) message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, loss_val) - fleet_util.print_on_rank(message, 0) + fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start except fluid.core.EOFException: diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py index 3852b225234..7accc917f80 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr_ps_gpu.py @@ -29,7 +29,6 @@ import numpy as np import ctr_dataset_reader from test_dist_fleet_base import runtime_main, FleetDistRunnerBase from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader -from paddle.distributed.fleet.base.util_factory import fleet_util # Fix seed for test fluid.default_startup_program().random_seed = 1 @@ -76,13 +75,13 @@ class TestDistGpuPsCTR2x2(TestDistCTR2x2): loss_val = exe.run(program=fleet.main_program, fetch_list=[self.avg_cost.name]) loss_val = np.mean(loss_val) - reduce_output = fleet_util.all_reduce( + reduce_output = fleet.util.all_reduce( np.array(loss_val), mode="sum") - loss_all_trainer = fleet_util.all_gather(float(loss_val)) + loss_all_trainer = fleet.util.all_gather(float(loss_val)) loss_val = float(reduce_output) / len(loss_all_trainer) message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, loss_val) - fleet_util.print_on_rank(message, 0) + fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start except fluid.core.EOFException: diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index 470fb98d799..f62ad66e462 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -29,7 +29,6 @@ import numpy as np import ctr_dataset_reader from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader -from paddle.distributed.fleet.base.util_factory import fleet_util paddle.enable_static() @@ -182,7 +181,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 - filelist = fleet_util.get_file_shard(train_file_list) + filelist = fleet.util.get_file_shard(train_file_list) print("filelist: {}".format(filelist)) # config dataset diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index ff848488739..fb7ddef862d 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -32,7 +32,6 @@ import os import signal from functools import reduce from test_dist_fleet_base import runtime_main, FleetDistRunnerBase -from paddle.distributed.fleet.base.util_factory import fleet_util paddle.enable_static() @@ -198,7 +197,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): def net(self, args, batch_size=4, lr=0.01): avg_cost, _, predict, self.reader = \ train_network(batch_size=batch_size, is_distributed=False, - is_sparse=True, is_self_contained_lr=False, is_pyreader=(args.reader == "pyreader")) + is_sparse=True, is_self_contained_lr=False, is_pyreader=(args.reader == "pyreader")) self.avg_cost = avg_cost self.predict = predict @@ -238,7 +237,7 @@ class TestDistSimnetBow2x2(FleetDistRunnerBase): loss_val = np.mean(loss_val) message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, loss_val) - fleet_util.print_on_rank(message, 0) + fleet.util.print_on_rank(message, 0) pass_time = time.time() - pass_start except fluid.core.EOFException: diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index 3a923dbf3f7..c46d1dc5b0f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -34,8 +34,7 @@ import unittest import paddle import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker -from paddle.distributed.fleet.base.util_factory import fleet_util -from paddle.distributed.fleet import fleet +import paddle.distributed.fleet as fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory __all__ = ['FleetDistRunnerBase', 'TestFleetBase', 'runtime_main'] @@ -97,7 +96,7 @@ class FleetDistRunnerBase(object): self.dump_fields_path = os.getenv("dump_fields_path", "") debug = int(os.getenv("Debug", "0")) # TODO(update strategy to support dump params) - if False: #debug: + if False: # debug: self.strategy.set_debug_opt({ "dump_param": self.dump_param, "dump_fields": self.dump_fields, @@ -372,8 +371,6 @@ def runtime_main(test_class): strategy = model.build_strategy(args) avg_cost = model.net(args) model.build_optimizer(avg_cost, strategy) - fleet_util._set_strategy(strategy) - fleet_util._set_role_maker(role) if args.role == "pserver": model.run_pserver(args) else: diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py index 4d744c8299f..ba97c5079bd 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py @@ -34,8 +34,7 @@ import unittest import paddle import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker -from paddle.distributed.fleet.base.util_factory import fleet_util -from paddle.distributed.fleet import fleet +import paddle.distributed.fleet as fleet __all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main'] @@ -376,8 +375,6 @@ def runtime_main(test_class): strategy = model.build_strategy(args) avg_cost = model.net(args) model.build_optimizer(avg_cost, strategy) - fleet_util._set_strategy(strategy) - fleet_util._set_role_maker(role) if args.role == "pserver" or args.role == "heter_trainer": model.run_pserver(args) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index 00301f9b1c6..7f4e5d99e02 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -19,7 +19,6 @@ import os import math import paddle.fluid as fluid import paddle.distributed.fleet.base.role_maker as role_maker -from paddle.distributed.fleet.base.util_factory import fleet_util from paddle.distributed.fleet import fleet import paddle diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 45597e7253c..ccd57c4d515 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -107,7 +107,7 @@ class TestFleetBase(unittest.TestCase): def test_util(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - self.assertEqual(fleet.util(), None) + self.assertNotEqual(fleet.util, None) def test_barrier_worker(self): role = role_maker.PaddleCloudRoleMaker(is_collective=True) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py index 4dd254af251..992fbbbe266 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py @@ -436,12 +436,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): optimizer.minimize(avg_cost) comm_world = "server" - fleet.util().barrier(comm_world) + fleet.util.barrier(comm_world) - gather = fleet.util().all_gather(1, comm_world) + gather = fleet.util.all_gather(1, comm_world) self.assertEqual(gather[0], 1) - all_reduce = fleet.util().all_reduce(1, "sum", comm_world) + all_reduce = fleet.util.all_reduce(1, "sum", comm_world) self.assertEqual(1, all_reduce) self.clean(tmp) @@ -752,12 +752,12 @@ class TestGlooWithCloudRoleMaker(unittest.TestCase): optimizer.minimize(avg_cost) comm_world = "server" - fleet.util().barrier(comm_world) + fleet.util.barrier(comm_world) - gather = fleet.util().all_gather(1, comm_world) + gather = fleet.util.all_gather(1, comm_world) self.assertEqual(gather[0], 1) - all_reduce = fleet.util().all_reduce(1, "sum", comm_world) + all_reduce = fleet.util.all_reduce(1, "sum", comm_world) self.assertEqual(1, all_reduce) self.clean(tmp) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_util.py b/python/paddle/fluid/tests/unittests/test_fleet_util.py index 1570912e740..b5c22b192a1 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_util.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_util.py @@ -22,7 +22,6 @@ import tempfile import os import sys from paddle.dataset.common import download, DATA_HOME -from paddle.distributed.fleet.base.util_factory import fleet_util import paddle.distributed.fleet.base.role_maker as role_maker @@ -59,8 +58,7 @@ class TestFleetUtil(unittest.TestCase): import paddle.distributed.fleet.base.role_maker as role_maker role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) - default_util = fleet.util() - self.assertEqual(default_util, None) + self.assertNotEqual(fleet.util, None) def test_set_user_defined_util(self): import paddle.distributed.fleet as fleet @@ -76,17 +74,19 @@ class TestFleetUtil(unittest.TestCase): role = role_maker.PaddleCloudRoleMaker(is_collective=True) fleet.init(role) my_util = UserDefinedUtil() - fleet.set_util(my_util) - user_id = fleet.util().get_user_id() + fleet.util = my_util + user_id = fleet.util.get_user_id() self.assertEqual(user_id, 10) def test_fs(self): - from paddle.distributed.fleet.utils.fs import LocalFS + import paddle.distributed.fleet as fleet + from paddle.distributed.fleet.utils import LocalFS + fs = LocalFS() dirs, files = fs.ls_dir("test_tmp") dirs, files = fs.ls_dir("./") self.assertFalse(fs.need_upload_download()) - fleet_util._set_file_system(fs) + fleet.util._set_file_system(fs) def download_files(self): path = download(self.proto_data_url, self.module_name, @@ -98,7 +98,8 @@ class TestFleetUtil(unittest.TestCase): return unzip_folder def test_get_file_shard(self): - self.assertRaises(Exception, fleet_util.get_file_shard, "files") + import paddle.distributed.fleet as fleet + self.assertRaises(Exception, fleet.util.get_file_shard, "files") try: import netifaces except: @@ -112,18 +113,20 @@ class TestFleetUtil(unittest.TestCase): role=role_maker.Role.WORKER, worker_endpoints=["127.0.0.1:6003", "127.0.0.1:6004"], server_endpoints=["127.0.0.1:6001", "127.0.0.1:6002"]) - fleet_util._set_role_maker(role) - files = fleet_util.get_file_shard(["1", "2", "3"]) + fleet.init(role) + + files = fleet.util.get_file_shard(["1", "2", "3"]) self.assertTrue(len(files) == 2 and "1" in files and "2" in files) def test_program_type_trans(self): + import paddle.distributed.fleet as fleet data_dir = self.download_files() program_dir = os.path.join(data_dir, self.pruned_dir) text_program = "pruned_main_program.pbtxt" binary_program = "pruned_main_program.bin" - text_to_binary = fleet_util._program_type_trans(program_dir, + text_to_binary = fleet.util._program_type_trans(program_dir, text_program, True) - binary_to_text = fleet_util._program_type_trans(program_dir, + binary_to_text = fleet.util._program_type_trans(program_dir, binary_program, False) self.assertTrue( os.path.exists(os.path.join(program_dir, text_to_binary))) @@ -131,6 +134,7 @@ class TestFleetUtil(unittest.TestCase): os.path.exists(os.path.join(program_dir, binary_to_text))) def test_prams_check(self): + import paddle.distributed.fleet as fleet data_dir = self.download_files() class config: @@ -160,11 +164,11 @@ class TestFleetUtil(unittest.TestCase): # test saved var's shape conf.dump_program_filename = "pruned_main_program.save_var_shape_not_match" - self.assertRaises(Exception, fleet_util._params_check) + self.assertRaises(Exception, fleet.util._params_check) # test program.proto without feed_op and fetch_op conf.dump_program_filename = "pruned_main_program.no_feed_fetch" - results = fleet_util._params_check(conf) + results = fleet.util._params_check(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( results[0], np.array( @@ -172,11 +176,11 @@ class TestFleetUtil(unittest.TestCase): # test feed_var's shape conf.dump_program_filename = "pruned_main_program.feed_var_shape_not_match" - self.assertRaises(Exception, fleet_util._params_check) + self.assertRaises(Exception, fleet.util._params_check) # test correct case with feed_vars_filelist conf.dump_program_filename = "pruned_main_program.pbtxt" - results = fleet_util._params_check(conf) + results = fleet.util._params_check(conf) self.assertTrue(len(results) == 1) np.testing.assert_array_almost_equal( results[0], np.array( @@ -186,13 +190,14 @@ class TestFleetUtil(unittest.TestCase): conf.feed_config.feeded_vars_filelist = None # test feed var with lod_level >= 2 conf.dump_program_filename = "pruned_main_program.feed_lod2" - self.assertRaises(Exception, fleet_util._params_check) + self.assertRaises(Exception, fleet.util._params_check) conf.dump_program_filename = "pruned_main_program.pbtxt" - results = fleet_util._params_check(conf) + results = fleet.util._params_check(conf) self.assertTrue(len(results) == 1) def test_proto_check(self): + import paddle.distributed.fleet as fleet data_dir = self.download_files() class config: @@ -210,7 +215,7 @@ class TestFleetUtil(unittest.TestCase): "pruned_main_program.save_var_shape_not_match")) conf.is_text_pruned_program = True conf.draw = False - res = fleet_util._proto_check(conf) + res = fleet.util._proto_check(conf) self.assertFalse(res) # test match @@ -222,10 +227,11 @@ class TestFleetUtil(unittest.TestCase): else: conf.draw = True conf.draw_out_name = "pruned_check" - res = fleet_util._proto_check(conf) + res = fleet.util._proto_check(conf) self.assertTrue(res) def test_visualize(self): + import paddle.distributed.fleet as fleet if sys.platform == 'win32' or sys.platform == 'sys.platform': pass else: @@ -234,10 +240,10 @@ class TestFleetUtil(unittest.TestCase): data_dir, os.path.join(self.train_dir, "join_main_program.pbtxt")) is_text = True - program = fleet_util._load_program(program_path, is_text) + program = fleet.util._load_program(program_path, is_text) output_dir = os.path.join(data_dir, self.train_dir) output_filename = "draw_prog" - fleet_util._visualize_graphviz(program, output_dir, output_filename) + fleet.util._visualize_graphviz(program, output_dir, output_filename) self.assertTrue( os.path.exists( os.path.join(output_dir, output_filename + ".dot"))) -- GitLab From 3d5522146e34a44aeaa9916fb46f0877cb0894af Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 24 Sep 2020 21:30:23 +0800 Subject: [PATCH 080/117] register seq_concat_fc_fuse pass. (#27479) --- .../framework/ir/seq_concat_fc_fuse_pass.cc | 13 ++++++++ .../inference/test_seq_concat_fc_fuse_pass.py | 33 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc index 7daa9b5eff7..4101d593086 100644 --- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc @@ -17,6 +17,7 @@ #include #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace framework { @@ -255,3 +256,15 @@ void SeqConcatFcFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(seq_concat_fc_fuse_pass, paddle::framework::ir::SeqConcatFcFusePass); +REGISTER_PASS_CAPABILITY(seq_concat_fc_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("sequence_expand", 0) + .EQ("concat", 0) + .EQ("mul", 0) + .EQ("elementwise_add", 0) + .EQ("sigmoid", 0) + .EQ("tanh", 0) + .EQ("relu", 0) + .EQ("identity", 0) + .EQ("fusion_seqexpand_concat_fc", 0)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py new file mode 100644 index 00000000000..33f215dafda --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_seq_concat_fc_fuse_pass.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class SeqConcatFCFusePassTest(InferencePassTest): + def test_compatible(self): + self.assertTrue( + PassVersionChecker.IsCompatible('seq_concat_fc_fuse_pass')) + + +if __name__ == "__main__": + unittest.main() -- GitLab From 8473ee9daedf46f11089605826531ce1ddc0eeb1 Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Thu, 24 Sep 2020 22:09:11 +0800 Subject: [PATCH 081/117] make places of DataLoader optional. (#27432) * make places of DataLoader optional. test=develop --- python/paddle/fluid/reader.py | 89 ++++++++++--------- .../test_multiprocess_dataloader_dynamic.py | 1 - ...ess_dataloader_iterable_dataset_dynamic.py | 1 - 3 files changed, 45 insertions(+), 46 deletions(-) diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py index 533222531f9..6cc00a7fd37 100644 --- a/python/paddle/fluid/reader.py +++ b/python/paddle/fluid/reader.py @@ -167,10 +167,10 @@ class DataLoader(object): The variables should be created by :code:`fluid.data()`. :attr:`feed_list` must be set if :attr:`return_list` is False. Default None. - places(list(Place)|tuple(Place)): a list of Place, to put data - onto, :attr:`places` must be set in both static graph and - dynamic graph mode, in dynamic graph mode, place number must - be 1. Default None. + places(list(Place)|tuple(Place)|optional): a list of Place, + to put data onto, :attr:`places` can be None, if + :attr:`places` is None, default place(CPUPlace or CUDAPlace(0)) + will be used. Default None. return_list (bool): whether the return value on each device is presented as a list. If :attr:`return_list=False`, the return value on each device would be a dict of str -> LoDTensor, where @@ -222,6 +222,8 @@ class DataLoader(object): .. code-block:: python import numpy as np + + import paddle import paddle.fluid as fluid from paddle.io import Dataset, BatchSampler, DataLoader @@ -247,11 +249,48 @@ class DataLoader(object): def __len__(self): return self.num_samples + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + # get places places = fluid.cuda_places() if USE_GPU else fluid.cpu_places() + # --------------------- dygraph mode -------------------- + + class SimpleNet(fluid.dygraph.Layer): + def __init__(self): + super(SimpleNet, self).__init__() + self.fc = fluid.dygraph.nn.Linear(IMAGE_SIZE, CLASS_NUM, act='softmax') + + def forward(self, image, label=None): + return self.fc(image) + + with fluid.dygraph.guard(places[0]): + simple_net = SimpleNet() + opt = fluid.optimizer.SGD(learning_rate=1e-3, + parameter_list=simple_net.parameters()) + + loader = DataLoader(dataset, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + + for e in range(EPOCH_NUM): + for i, (image, label) in enumerate(loader()): + out = simple_net(image) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.reduce_mean(loss) + avg_loss.backward() + opt.minimize(avg_loss) + simple_net.clear_gradients() + print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy()))) + + # ------------------------------------------------------- + # -------------------- static graph --------------------- + paddle.enable_static() + def simple_net(image, label): fc_tmp = fluid.layers.fc(image, size=CLASS_NUM, act='softmax') cross_entropy = fluid.layers.softmax_with_cross_entropy(image, label) @@ -270,11 +309,8 @@ class DataLoader(object): prog = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(loss_name=loss.name) - dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - loader = DataLoader(dataset, feed_list=[image, label], - places=places, batch_size=BATCH_SIZE, shuffle=True, drop_last=True, @@ -287,39 +323,6 @@ class DataLoader(object): # ------------------------------------------------------- - # --------------------- dygraph mode -------------------- - - class SimpleNet(fluid.dygraph.Layer): - def __init__(self): - super(SimpleNet, self).__init__() - self.fc = fluid.dygraph.nn.Linear(IMAGE_SIZE, CLASS_NUM, act='softmax') - - def forward(self, image, label=None): - return self.fc(image) - - with fluid.dygraph.guard(places[0]): - simple_net = SimpleNet() - opt = fluid.optimizer.SGD(learning_rate=1e-3, - parameter_list=simple_net.parameters()) - - loader = DataLoader(dataset, - places=places[0], - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=2) - - for e in range(EPOCH_NUM): - for i, (image, label) in enumerate(loader()): - out = simple_net(image) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.reduce_mean(loss) - avg_loss.backward() - opt.minimize(avg_loss) - simple_net.clear_gradients() - print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy()))) - - # ------------------------------------------------------- .. note:: For reading iterable dataset with multiprocess Dataloader, @@ -356,11 +359,9 @@ class DataLoader(object): "feed_list should be set when return_list=False" self.feed_list = feed_list - assert places is not None, "places cannot be None" + if places is None: + places = _current_expected_place() self.places = _convert_places(places) - if in_dygraph_mode(): - assert len(self.places) == 1, \ - "Number of places must be 1 in dygraph mode" assert num_workers >= 0, "num_workers should be a non-negative value" if num_workers > 0 and (sys.platform == 'darwin' or diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py index 0706eb53d53..1bb720673e4 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dynamic.py @@ -76,7 +76,6 @@ class TestDygraphDataLoader(unittest.TestCase): dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) dataloader = DataLoader( dataset, - places=places, num_workers=num_workers, batch_size=BATCH_SIZE, drop_last=True) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py index 8f0209406fd..af332d8e432 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_dynamic.py @@ -76,7 +76,6 @@ class TestDygraphDataLoader(unittest.TestCase): dataset = RandomDataset(SAMPLE_NUM, CLASS_NUM) dataloader = DataLoader( dataset, - places=places, num_workers=num_workers, batch_size=BATCH_SIZE, drop_last=True) -- GitLab From d20349b548da26200aad0110d7a5ed7b678a9f5c Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Thu, 24 Sep 2020 22:14:25 +0800 Subject: [PATCH 082/117] add unittest count ,install check on windows (#27492) * add unittest count of windows * Reduce the number of retries --- cmake/generic.cmake | 1 + paddle/scripts/installation_validate.py | 1 + paddle/scripts/paddle_build.bat | 17 ++++++++++------- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 3bdf7c209b4..a2386265367 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -721,6 +721,7 @@ function(proto_library TARGET_NAME) set(proto_hdrs) paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${proto_library_SRCS}) cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS ${proto_library_DEPS} protobuf) + add_dependencies(extern_xxhash ${TARGET_NAME}) endfunction() function(py_proto_compile TARGET_NAME) diff --git a/paddle/scripts/installation_validate.py b/paddle/scripts/installation_validate.py index f84e2f4b176..b765291a3b8 100644 --- a/paddle/scripts/installation_validate.py +++ b/paddle/scripts/installation_validate.py @@ -15,4 +15,5 @@ import paddle.fluid as fluid import paddle as pd +fluid.install_check.run_check() print(pd.__version__) diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index 524c086c079..7ad2787d181 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -40,6 +40,7 @@ if not defined WITH_TPCACHE set WITH_TPCACHE=ON rem -------set cache build work directory----------- +rmdir build\python /s/q if "%WITH_CACHE%"=="OFF" ( rmdir build /s/q goto :mkbuild @@ -48,10 +49,10 @@ if "%WITH_CACHE%"=="OFF" ( for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# set day_now=%datetime:~6,2% set day_before=-1 -set /p day_before= day.txt - type day.txt + echo %day_now% > %work_dir%\..\day.txt + type %work_dir%\..\day.txt rmdir build /s/q ) git diff origin/develop --stat --name-only | findstr "cmake CMakeLists.txt paddle_build.bat" @@ -208,7 +209,7 @@ echo Build third_party the %build_times% time: msbuild /m /p:Configuration=Release /verbosity:quiet third_party.vcxproj if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 - if %build_times% GTR 3 ( + if %build_times% GTR 2 ( exit /b 7 ) else ( echo Build third_party failed, will retry! @@ -223,7 +224,7 @@ echo Build Paddle the %build_times% time: msbuild /m:%PARALLEL_PROJECT_COUNT% /p:TrackFileAccess=false /p:CLToolExe=clcache.exe /p:CLToolPath=%PYTHON_ROOT%\Scripts /p:Configuration=Release /verbosity:minimal paddle.sln if %ERRORLEVEL% NEQ 0 ( set /a build_times=%build_times%+1 - if %build_times% GTR 2 ( + if %build_times% GTR 1 ( exit /b 7 ) else ( echo Build Paddle failed, will retry! @@ -301,6 +302,7 @@ goto:eof call paddle_winci\Scripts\deactivate.bat 2>NUL for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# set end=%end:~4,10% +call :timestamp "%start%" "%end%" "1 card TestCases Total" call :timestamp "%start%" "%end%" "TestCases Total" echo Running unit tests failed, will exit! exit /b 8 @@ -313,6 +315,7 @@ echo ======================================== for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# set end=%end:~4,10% +call :timestamp "%start%" "%end%" "1 card TestCases Total" call :timestamp "%start%" "%end%" "TestCases Total" cd %work_dir%\paddle\fluid\inference\api\demo_ci @@ -345,6 +348,8 @@ echo ============================================ >> check_change_of_unitte echo EOF>> check_change_of_unittest.sh echo spec_path=$(pwd)/UNITTEST_PR.spec>> check_change_of_unittest.sh echo ctest -N ^| awk -F ':' '{print $2}' ^| sed '/^^$/d' ^| sed '$d' ^> ${spec_path}>> check_change_of_unittest.sh +echo num=$(awk 'END{print NR}' ${spec_path})>> check_change_of_unittest.sh +echo echo "Windows 1 card TestCases count is $num">> check_change_of_unittest.sh echo UPSTREAM_URL='https://github.com/PaddlePaddle/Paddle'>> check_change_of_unittest.sh echo origin_upstream_url=`git remote -v ^| awk '{print $1, $2}' ^| uniq ^| grep upstream ^| awk '{print $2}'`>> check_change_of_unittest.sh echo if [ "$origin_upstream_url" == "" ]; then>> check_change_of_unittest.sh @@ -455,8 +460,6 @@ taskkill /f /im cvtres.exe 2>NUL taskkill /f /im rc.exe 2>NUL wmic process where name="op_function_generator.exe" call terminate 2>NUL taskkill /f /im python.exe 2>NUL -call paddle_winci\Scripts\deactivate.bat 2>NUL -del %PADDLE_WHL_FILE_WIN% taskkill /f /im python.exe 2>NUL echo Windows CI run successfully! exit /b 0 -- GitLab From c83ade6d6b0b3ea931af6131990a70243eab3817 Mon Sep 17 00:00:00 2001 From: mapingshuo Date: Fri, 25 Sep 2020 09:39:49 +0800 Subject: [PATCH 083/117] add AsDuplicable for sync_comm op(#27515) --- paddle/fluid/operators/collective/c_sync_comm_stream_op.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc index d8617492fb1..7e5311a2103 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op.cc @@ -54,8 +54,10 @@ class CSyncCommStreamOp : public framework::OperatorBase { class CSyncCommStreamOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() { - AddInput("X", "(Tensor) Dependency of the variable need to sync"); - AddOutput("Out", "(Tensor) Dependency of the variable need to sync"); + AddInput("X", "(Tensor) Dependency of the variable need to sync") + .AsDuplicable(); + AddOutput("Out", "(Tensor) Dependency of the variable need to sync") + .AsDuplicable(); AddAttr("ring_id", "(int default 0) ring id.").SetDefault(0); AddComment(R"DOC( CSyncCommStream Operator -- GitLab From a2e0b7cb4ad01f8ceb48920a721697c7ca92b8c5 Mon Sep 17 00:00:00 2001 From: tianshuo78520a <707759223@qq.com> Date: Fri, 25 Sep 2020 10:02:11 +0800 Subject: [PATCH 084/117] update gcc8 in python3 ci docker (#26979) * update gcc8 in python3 ci docker * change cuda 10.2 * update cudnn8 * nvidia error cuda10.2-cudnn8-centos6 images * fix third cache --- cmake/third_party.cmake | 1 + tools/dockerfile/Dockerfile.centos | 6 ++---- tools/dockerfile/ci_dockerfile.sh | 11 ++++++++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 9edfcb967ab..ffd32cc78f0 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -39,6 +39,7 @@ set(third_party_deps) # REPOSITORY ${TARGET_REPOSITORY} # TAG ${TARGET_TAG} # DIR ${TARGET_SOURCE_DIR}) + FUNCTION(cache_third_party TARGET) SET(options "") SET(oneValueArgs URL REPOSITORY TAG DIR) diff --git a/tools/dockerfile/Dockerfile.centos b/tools/dockerfile/Dockerfile.centos index b10e76a4b4d..a50d08354b8 100644 --- a/tools/dockerfile/Dockerfile.centos +++ b/tools/dockerfile/Dockerfile.centos @@ -80,9 +80,7 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ make -j8 && make install && \ ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache -# gcc4.8 TRT -RUN mkdir -p /opt/compiler && cd /opt/compiler && \ - wget -q https://paddle-ci.gz.bcebos.com/gcc-4.8.2.tar.gz && \ - tar xf gcc-4.8.2.tar.gz && rm -f gcc-4.8.2.tar.gz +# Downgrade gcc&&g++ + CMD ["bash", "/paddle/paddle/scripts/docker/build.sh"] diff --git a/tools/dockerfile/ci_dockerfile.sh b/tools/dockerfile/ci_dockerfile.sh index 3716084487e..9c8f8f563ab 100644 --- a/tools/dockerfile/ci_dockerfile.sh +++ b/tools/dockerfile/ci_dockerfile.sh @@ -21,7 +21,7 @@ function make_ubuntu_dockerfile(){ function make_centos_dockerfile(){ dockerfile_name="Dockerfile.cuda9_cudnn7_gcc48_py35_centos6" - sed 's//9.0-cudnn7-devel-centos6/g' Dockerfile.centos >${dockerfile_name} + sed 's//10.2-cudnn7-devel-centos6/g' Dockerfile.centos >${dockerfile_name} sed -i 's#COPY build_scripts /build_scripts#COPY tools/dockerfile/build_scripts ./build_scripts#g' ${dockerfile_name} dockerfile_line=`wc -l ${dockerfile_name}|awk '{print $1}'` sed -i "${dockerfile_line}i RUN ln -s /usr/lib64/libz.so /usr/local/lib/libz.so && \ @@ -29,6 +29,15 @@ function make_centos_dockerfile(){ rm -rf /usr/include/NvInfer*" ${dockerfile_name} sed -i "${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz && \ tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name} + sed -i 's##WORKDIR /usr/bin \ + COPY tools/dockerfile/build_scripts /build_scripts \ + RUN bash /build_scripts/install_gcc.sh gcc82 \&\& rm -rf /build_scripts \ + RUN cp gcc gcc.bak \&\& cp g++ g++.bak \&\& rm gcc \&\& rm g++ \ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc \ + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ \ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \ + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ \ + ENV PATH=/usr/local/gcc-8.2/bin:$PATH #g' ${dockerfile_name} } -- GitLab From 41a7ce8347dcf154f3f33707d96fc4217b5dbd60 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 25 Sep 2020 10:09:31 +0800 Subject: [PATCH 085/117] fix random failure of test_buffer_sahred_memory_reuse_pass (#27551) --- .../test_buffer_shared_memory_reuse_pass.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py index 2c9168df472..9dd617f90b6 100644 --- a/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py +++ b/python/paddle/fluid/tests/unittests/test_buffer_shared_memory_reuse_pass.py @@ -115,8 +115,15 @@ class InplaceTestBase(unittest.TestCase): fetch_val2, = exe.run(compiled_prog, feed=feed_dict, fetch_list=[fetch_var]) - - self.assertTrue(np.array_equal(fetch_val1, fetch_val2)) + #NOTE(zhiqiu): Temporally changed from array_equal to allclose. + # The real root is fuse_all_reduce and fuse_all_optimizer_opss may + # result in diff because of the instruction set on the virtual machine. + # And the related unit tests: test_fuse_all_reduce_pass and test_fuse_optimizer_pass use "almostEqual" in their checks. + # There are also some related issues: + # https://github.com/PaddlePaddle/Paddle/issues/21270 + # https://github.com/PaddlePaddle/Paddle/issues/21046 + # https://github.com/PaddlePaddle/Paddle/issues/21045 + self.assertTrue(np.allclose(fetch_val1, fetch_val2)) def check_multi_card_fetch_var(self): if self.is_invalid_test(): @@ -160,7 +167,8 @@ class InplaceTestBase(unittest.TestCase): fetch_vals.append(fetch_val) for item in fetch_vals: - self.assertTrue(np.array_equal(fetch_vals[0], item)) + # save above + self.assertTrue(np.allclose(fetch_vals[0], item)) class CUDAInplaceTest(InplaceTestBase): -- GitLab From 059bfd69a88be5eaae11f3067e2cbc3b5033ceaa Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Fri, 25 Sep 2020 10:34:09 +0800 Subject: [PATCH 086/117] change wlist.json, add annotation (#27438) * modify ci api white list, add annotation * modify ci api white list, add annotation * move gpu_not_white list to wlist.json * add null line --- tools/sampcd_processor.py | 23 ++++++++------- tools/wlist.json | 62 ++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index 033b4b8723a..d23c18a44e9 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -534,13 +534,6 @@ def get_incrementapi(): f.write('\n') -# only white on CPU -gpu_not_white = [ - "deformable_conv", "cuda_places", "CUDAPinnedPlace", "CUDAPlace", - "cuda_profiler", 'DGCMomentumOptimizer' -] - - def get_wlist(): ''' this function will get the white list of API. @@ -552,17 +545,25 @@ def get_wlist(): ''' wlist = [] wlist_file = [] + # only white on CPU + gpu_not_white = [] with open("wlist.json", 'r') as load_f: load_dict = json.load(load_f) for key in load_dict: - if key == 'wlist_file': - wlist_file = wlist_file + load_dict[key] + if key == 'wlist_dir': + for item in load_dict[key]: + wlist_file.append(item["name"]) + elif key == "gpu_not_white": + gpu_not_white = load_dict[key] + elif key == "wlist_api": + for item in load_dict[key]: + wlist.append(item["name"]) else: wlist = wlist + load_dict[key] - return wlist, wlist_file + return wlist, wlist_file, gpu_not_white -wlist, wlist_file = get_wlist() +wlist, wlist_file, gpu_not_white = get_wlist() if len(sys.argv) < 2: print("Error: inadequate number of arguments") diff --git a/tools/wlist.json b/tools/wlist.json index 5591f90da4b..0ed0b4e4069 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -1,11 +1,29 @@ { - "wlist_file" : [ - "../python/paddle/fluid/contrib", - "../python/paddle/verison.py", - "../python/paddle/fluid/core_avx.py", - "../python/paddle/distributed" + "wlist_dir":[ + { + "name":"../python/paddle/fluid/contrib", + "annotation":"" + }, + { + "name":"../python/paddle/verison.py", + "annotation":"" + }, + { + "name":"../python/paddle/fluid/core_avx.py", + "annotation":"" + }, + { + "name":"../python/paddle/distributed", + "annotation":"" + } ], - "wlist_inneed":[ + "wlist_api":[ + { + "name":"xxxxx", + "annotation":"not a real api, just for example" + } + ], + "wlist_temp_api":[ "append_LARS", "BuildStrategy.debug_graphviz_path", "BuildStrategy.enable_sequential_execution", @@ -63,9 +81,7 @@ "cuda_places", "CUDAPinnedPlace", "CUDAPlace", - "Program.parse_from_string" - ], - "wlist_nosample":[ + "Program.parse_from_string", "Compressor", "Compressor.config", "Compressor.run", @@ -159,13 +175,9 @@ "RNN", "BiRNN", "RNNCellBase", - "RNNCellBase.get_initial_states" - ], - "wlist_no_op_pass":[ + "RNNCellBase.get_initial_states", "gelu", - "erf" - ], - "wlist_ci_nopass":[ + "erf", "DecodeHelper", "DecodeHelper.initialize", "DecodeHelper.sample", @@ -188,9 +200,7 @@ "SampleEmbeddingHelper", "BasicDecoder", "lstm", - "partial_sum" - ], - "wlist_nopass":[ + "partial_sum", "StateCell", "StateCell.compute_state", "TrainingDecoder", @@ -242,9 +252,7 @@ "GroupNorm", "SpectralNorm", "TreeConv", - "prroi_pool" - ], - "wlist_temp":[ + "prroi_pool", "to_tensor", "ChunkEvaluator", "EditDistance", @@ -322,9 +330,7 @@ "Conv2DTranspose", "QueueDataset.local_shuffle", "save_persistables@dygraph/checkpoint.py", - "load_persistables@dygraph/checkpoint.py" - ], - "wlist_ignore":[ + "load_persistables@dygraph/checkpoint.py", "elementwise_pow", "WeightedAverage.reset", "ChunkEvaluator.eval", @@ -401,5 +407,13 @@ "LinearChainCRF.forward", "CRFDecoding.forward", "SequenceTagging.forward" + ], + "gpu_not_white":[ + "deformable_conv", + "cuda_places", + "CUDAPinnedPlace", + "CUDAPlace", + "cuda_profiler", + "DGCMomentumOptimizer" ] } -- GitLab From c5c13473c65520c0439eb009e2e709cca0c4a249 Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Fri, 25 Sep 2020 10:45:18 +0800 Subject: [PATCH 087/117] Add compatibility check for four mkldnn pass (#27364) * Add pass compatibility check for four mkldnn pass, test=develop --- .../conv_activation_mkldnn_fuse_pass.cc | 21 ++++ .../conv_concat_relu_mkldnn_fuse_pass.cc | 8 ++ .../matmul_transpose_reshape_fuse_pass.cc | 8 ++ .../ir/mkldnn/scale_matmul_fuse_pass.cc | 7 ++ .../test_mkldnn_conv_activation_fuse_pass.py | 106 ++++++++++++++++++ ...kldnn_conv_concat_relu_mkldnn_fuse_pass.py | 92 +++++++++++++++ ...ldnn_matmul_transpose_reshape_fuse_pass.py | 81 +++++++++++++ ... => test_mkldnn_scale_matmul_fuse_pass.py} | 40 ++++++- 8 files changed, 357 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py rename python/paddle/fluid/tests/unittests/ir/inference/{test_mkldnn_conv_relu_fuse_pass.py => test_mkldnn_scale_matmul_fuse_pass.py} (50%) diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc index a5beec87c39..c33398553ec 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" #include +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -103,12 +104,32 @@ REGISTER_PASS(conv_activation_mkldnn_fuse_pass, REGISTER_PASS(conv_relu_mkldnn_fuse_pass, paddle::framework::ir::ConvActivationFusePass); +REGISTER_PASS_CAPABILITY(conv_relu_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("relu", 0)); REGISTER_PASS(conv_leaky_relu_mkldnn_fuse_pass, paddle::framework::ir::Conv2DLeakyReLUFusePass); +REGISTER_PASS_CAPABILITY(conv_leaky_relu_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .LE("leaky_relu", 1)); REGISTER_PASS(conv_relu6_mkldnn_fuse_pass, paddle::framework::ir::Conv2DReLU6FusePass); +REGISTER_PASS_CAPABILITY(conv_relu6_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("relu6", 0)); REGISTER_PASS(conv_swish_mkldnn_fuse_pass, paddle::framework::ir::Conv2DSwishFusePass); +REGISTER_PASS_CAPABILITY(conv_swish_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("swish", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc index 5fadd9607e9..76e10212550 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h" #include +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -123,3 +124,10 @@ void ConvConcatReLUFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(conv_concat_relu_mkldnn_fuse_pass, paddle::framework::ir::ConvConcatReLUFusePass); + +REGISTER_PASS_CAPABILITY(conv_concat_relu_mkldnn_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("conv2d", 0) + .EQ("concat", 0) + .EQ("relu", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc index 37c14e1d8e3..41b859f0af6 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h" #include #include +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -98,3 +99,10 @@ void MatmulTransposeReshapeMKLDNNPass::ApplyImpl(ir::Graph *graph) const { REGISTER_PASS(matmul_transpose_reshape_fuse_pass, paddle::framework::ir::MatmulTransposeReshapeMKLDNNPass); + +REGISTER_PASS_CAPABILITY(matmul_transpose_reshape_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("matmul", 0) + .EQ("transpose", 0) + .EQ("reshape", 0)); diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc index 790821e3fa4..0784a1a024c 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/string/pretty_log.h" namespace paddle { @@ -90,3 +91,9 @@ void ScaleMatmulFusePass::ApplyImpl(ir::Graph* graph) const { REGISTER_PASS(scale_matmul_fuse_pass, paddle::framework::ir::ScaleMatmulFusePass); + +REGISTER_PASS_CAPABILITY(scale_matmul_fuse_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("scale", 0) + .EQ("matmul", 0)); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py new file mode 100644 index 00000000000..5d96994a33b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py @@ -0,0 +1,106 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class ConvActivationMkldnnFusePassTest(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 3, 100, 100], dtype="float32") + conv_out = fluid.layers.conv2d( + data, + num_filters=self.conv_num_filters, + filter_size=self.conv_filter_size, + bias_attr=self.conv_bias_attr, + act=self.act) + + self.feeds = { + "data": np.random.random((1, 3, 100, 100)).astype("float32") + } + self.fetch_list = [conv_out] + self.enable_mkldnn = True + + def set_params(self): + self.conv_num_filters = 3 + self.conv_filter_size = 3 + self.conv_bias_attr = False + self.act = "relu" + self.pass_name = 'conv_relu_mkldnn_fuse_pass' + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + def test_pass_compatible(self): + self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) + + +class ConvActivationMkldnnFusePassTest_1(ConvActivationMkldnnFusePassTest): + def set_params(self): + self.conv_num_filters = 5 + self.conv_filter_size = 5 + self.conv_bias_attr = True + self.act = "relu" + self.pass_name = 'conv_relu_mkldnn_fuse_pass' + + +class ConvActivationMkldnnFusePassTest_2(ConvActivationMkldnnFusePassTest): + def set_params(self): + self.conv_num_filters = 3 + self.conv_filter_size = 3 + self.conv_bias_attr = False + self.act = "leaky_relu" + self.pass_name = 'conv_leaky_relu_mkldnn_fuse_pass' + + +class ConvActivationMkldnnFusePassTest_3(ConvActivationMkldnnFusePassTest): + def set_params(self): + self.conv_num_filters = 5 + self.conv_filter_size = 5 + self.conv_bias_attr = True + self.act = "leaky_relu" + self.pass_name = 'conv_leaky_relu_mkldnn_fuse_pass' + + +class ConvActivationMkldnnFusePassTest_4(ConvActivationMkldnnFusePassTest): + def set_params(self): + self.conv_num_filters = 3 + self.conv_filter_size = 3 + self.conv_bias_attr = False + self.act = "relu6" + self.pass_name = 'conv_relu6_mkldnn_fuse_pass' + + +class ConvActivationMkldnnFusePassTest_4(ConvActivationMkldnnFusePassTest): + def set_params(self): + self.conv_num_filters = 5 + self.conv_filter_size = 5 + self.conv_bias_attr = True + self.act = "swish" + self.pass_name = 'conv_swish_mkldnn_fuse_pass' + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py new file mode 100644 index 00000000000..45097f6b819 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_concat_relu_mkldnn_fuse_pass.py @@ -0,0 +1,92 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class ConvConcatReluMkldnnFusePassTest_0(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data_1 = fluid.data( + name="data_1", shape=[-1, 3, 100, 100], dtype="float32") + data_2 = fluid.data( + name="data_2", shape=[-1, 3, 100, 100], dtype="float32") + conv_1 = fluid.layers.conv2d( + data_1, + num_filters=self.conv1_num_filters, + filter_size=self.conv1_filter_size, + padding=self.conv1_padding, + bias_attr=self.conv1_bias_attr) + conv_2 = fluid.layers.conv2d( + data_2, + num_filters=self.conv2_num_filters, + filter_size=self.conv2_filter_size, + padding=self.conv2_padding, + bias_attr=self.conv2_bias_attr) + concat = fluid.layers.concat( + [conv_1, conv_2], axis=self.concat_axis) + out = fluid.layers.relu(concat) + + self.feeds = { + "data_1": np.random.random((1, 3, 100, 100)).astype("float32"), + "data_2": np.random.random((1, 3, 100, 100)).astype("float32") + } + self.fetch_list = [out] + self.enable_mkldnn = True + + def set_params(self): + self.conv1_num_filters = 3 + self.conv1_filter_size = 3 + self.conv1_padding = 0 + self.conv1_bias_attr = False + self.conv2_num_filters = 3 + self.conv2_filter_size = 3 + self.conv2_padding = 0 + self.conv2_bias_attr = False + self.concat_axis = 0 + self.pass_name = "conv_concat_relu_mkldnn_fuse_pass" + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + def test_pass_compatible(self): + self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) + + +class ConvConcatReluMkldnnFusePassTest_1(ConvConcatReluMkldnnFusePassTest_0): + def set_params(self): + self.conv1_num_filters = 3 + self.conv1_filter_size = 3 + self.conv1_padding = 0 + self.conv1_bias_attr = False + self.conv2_num_filters = 5 + self.conv2_filter_size = 5 + self.conv2_padding = 1 + self.conv2_bias_attr = True + self.concat_axis = 1 + self.pass_name = "conv_concat_relu_mkldnn_fuse_pass" + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py new file mode 100644 index 00000000000..a6b5e0e5473 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_matmul_transpose_reshape_fuse_pass.py @@ -0,0 +1,81 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker + + +class MatmulTransposeReshapeMkldnnFusePassTest(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=self.data_shape, dtype="float32") + weight = fluid.layers.create_parameter( + shape=self.weight_shape, dtype="float32") + matmul = fluid.layers.matmul( + data, + weight, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y) + transpose = fluid.layers.transpose(matmul, self.tranpose_perm) + reshape = fluid.layers.reshape(transpose, shape=self.reshape_shape) + + self.fetch_list = [reshape] + self.enable_mkldnn = True + + def set_params(self): + self.data_shape = [-1, 3, 100, 110] + self.weight_shape = [1, 3, 110, 100] + self.feeds = { + "data": np.random.random((1, 3, 100, 110)).astype("float32") + } + self.transpose_x = False + self.transpose_y = False + self.tranpose_perm = [0, 2, 1, 3] + self.reshape_shape = [3, 100, 100] + self.pass_name = 'matmul_transpose_reshape_fuse_pass' + + def test_check_output(self): + use_gpu = False + self.check_output_with_option(use_gpu) + + def test_pass_compatible(self): + self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) + + +class MatmulTransposeReshapeMkldnnFusePassTest_1( + MatmulTransposeReshapeMkldnnFusePassTest): + def set_params(self): + self.data_shape = [-1, 3, 100, 100] + self.weight_shape = [1, 3, 100, 100] + self.feeds = { + "data": np.random.random((1, 3, 100, 100)).astype("float32") + } + self.transpose_x = True + self.transpose_y = True + self.tranpose_perm = [0, 2, 1, 3] + self.reshape_shape = [6, 50, 100] + self.pass_name = 'matmul_transpose_reshape_fuse_pass' + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py similarity index 50% rename from python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py rename to python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py index 2346e93d64d..55a6b543f0a 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_relu_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_scale_matmul_fuse_pass.py @@ -20,26 +20,54 @@ from inference_pass_test import InferencePassTest import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import AnalysisConfig +from paddle.fluid.core import PassVersionChecker -class ConvBnFusePassMKLDNNTest(InferencePassTest): +class ScaleMatmulMkldnnFusePassTest(InferencePassTest): def setUp(self): + self.set_params() with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data( - name="data", shape=[-1, 3, 100, 100], dtype="float32") - conv_out = fluid.layers.conv2d( - data, num_filters=3, filter_size=3, bias_attr=False, act="relu") + name="data", shape=[1, 3, 100, 100], dtype="float32") + weight = fluid.layers.create_parameter( + shape=[1, 3, 100, 100], dtype="float32") + scale = fluid.layers.scale(data, scale=self.scale_scale) + matmul = fluid.layers.matmul( + scale, + weight, + transpose_x=self.transpose_x, + transpose_y=self.transpose_y) + self.fetch_list = [matmul] + self.enable_mkldnn = True + + def set_params(self): self.feeds = { "data": np.random.random((1, 3, 100, 100)).astype("float32") } - self.fetch_list = [conv_out] - self.enable_mkldnn = True + self.scale_scale = 2.0 + self.transpose_x = False + self.transpose_y = False + self.pass_name = "scale_matmul_fuse_pass" def test_check_output(self): use_gpu = False self.check_output_with_option(use_gpu) + def test_pass_compatible(self): + self.assertTrue(PassVersionChecker.IsCompatible(self.pass_name)) + + +class ScaleMatmulMkldnnFusePassTest_1(ScaleMatmulMkldnnFusePassTest): + def set_params(self): + self.feeds = { + "data": np.random.random((1, 3, 100, 100)).astype("float32") + } + self.scale_scale = 5.0 + self.transpose_x = True + self.transpose_y = True + self.pass_name = "scale_matmul_fuse_pass" + if __name__ == "__main__": unittest.main() -- GitLab From e550fc02ae93450e9acb6c238f55733dca269c61 Mon Sep 17 00:00:00 2001 From: WangXi Date: Fri, 25 Sep 2020 10:58:17 +0800 Subject: [PATCH 088/117] fleet2.0 add fp16 grad compression (#27480) --- .../framework/distributed_strategy.proto | 1 + .../fleet/base/distributed_strategy.py | 23 +++ .../fleet/meta_optimizers/__init__.py | 1 + .../fp16_allreduce_optimizer.py | 146 ++++++++++++++++++ .../fluid/tests/unittests/CMakeLists.txt | 2 + .../unittests/dist_mnist_fp16_allreduce.py | 63 ++++++++ .../test_dist_mnist_fp16_allreduce.py | 33 ++++ .../test_fleet_distributed_strategy.py | 10 ++ ...est_fleet_fp16_allreduce_meta_optimizer.py | 91 +++++++++++ 9 files changed, 370 insertions(+) create mode 100755 python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py create mode 100644 python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py create mode 100644 python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py create mode 100644 python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index df482f43346..c9ae5a67950 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -127,6 +127,7 @@ message DistributedStrategy { optional int32 conv_workspace_size_limit = 22 [ default = 4000 ]; optional bool cudnn_batchnorm_spatial_persistent = 23 [ default = true ]; optional bool adaptive_localsgd = 24 [ default = false ]; + optional bool fp16_allreduce = 25 [ default = false ]; optional RecomputeConfig recompute_configs = 101; optional AMPConfig amp_configs = 102; diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index f1c836468da..316b6494e34 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -845,6 +845,29 @@ class DistributedStrategy(object): check_configs_key(self.strategy.dgc_configs, configs, "dgc_configs") assign_configs_value(self.strategy.dgc_configs, configs) + @property + def fp16_allreduce(self): + """ + Indicating whether we are using fp16 gradient allreduce training + Default Value: False + + Examples: + .. code-block:: python + + import paddle.distributed.fleet as fleet + strategy = fleet.DistributedStrategy() + strategy.fp16_allreduce = True # by default this is false + + """ + return self.strategy.fp16_allreduce + + @fp16_allreduce.setter + @is_strict_auto + def fp16_allreduce(self, flag): + if not isinstance(flag, bool): + raise TypeError('fp16_allreduce must be value of bool type') + self.strategy.fp16_allreduce = flag + @property def gradient_merge(self): """ diff --git a/python/paddle/distributed/fleet/meta_optimizers/__init__.py b/python/paddle/distributed/fleet/meta_optimizers/__init__.py index a3a2dee7038..2e63e82e630 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/__init__.py +++ b/python/paddle/distributed/fleet/meta_optimizers/__init__.py @@ -23,3 +23,4 @@ from .lars_optimizer import LarsOptimizer from .parameter_server_graph_optimizer import ParameterServerGraphOptimizer from .dgc_optimizer import DGCOptimizer from .lamb_optimizer import LambOptimizer +from .fp16_allreduce_optimizer import FP16AllReduceOptimizer diff --git a/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py new file mode 100755 index 00000000000..411980ed013 --- /dev/null +++ b/python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py @@ -0,0 +1,146 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and + +from paddle.fluid import core, framework, unique_name +from .meta_optimizer_base import MetaOptimizerBase + + +class FP16AllReduceOptimizer(MetaOptimizerBase): + def __init__(self, optimizer): + super(FP16AllReduceOptimizer, self).__init__(optimizer) + self.inner_opt = optimizer + # we do not allow meta optimizer to be inner optimizer currently + self.meta_optimizers_white_list = [ + "LarsOptimizer", + "LambOptimizer", + "RecomputeOptimizer", + "LocalSGDOptimizer", + "GradientMergeOptimizer", + "GraphExecutionOptimizer", + "AdaptiveLocalSGDOptimizer", + ] + self.meta_optimizers_black_list = ["DGCOptimizer"] + + def _set_basic_info(self, loss, role_maker, user_defined_optimizer, + user_defined_strategy): + super(FP16AllReduceOptimizer, self)._set_basic_info( + loss, role_maker, user_defined_optimizer, user_defined_strategy) + + def _can_apply(self): + if not self.role_maker._is_collective: + return False + + if self.user_defined_strategy.fp16_allreduce: + return True + + return False + + def _disable_strategy(self, dist_strategy): + dist_strategy.fp16_allreduce = False + + def _enable_strategy(self, dist_strategy, context=None): + dist_strategy.fp16_allreduce = True + + @staticmethod + def fp16_compression(param_and_grads): + """ + Compress fp32 gradients to fp16 during allreduce. + """ + op_maker = core.op_proto_and_checker_maker + + new_param_and_grads = [] # param, grad, is_cast + # cast grad from fp32->fp16 before allreduce, + for param, grad in param_and_grads: + if grad is None or grad.dtype != core.VarDesc.VarType.FP32: + new_param_and_grads.append((param, grad, False)) + continue + + op = grad.op + block = grad.block + var_attr = op.all_attrs()[op_maker.kOpRoleVarAttrName()] + if param.name not in var_attr: + new_param_and_grads.append((param, grad, False)) + continue + + # remove (param, grad) from op_role_var + var_attr.remove(param.name) + var_attr.remove(grad.name) + if len(var_attr) > 1: + op._set_attr(op_maker.kOpRoleVarAttrName(), var_attr) + else: + op._remove_attr(op_maker.kOpRoleVarAttrName()) + + new_grad = block.create_var( + name=unique_name.generate(grad.name + ".cast_fp16"), + dtype=core.VarDesc.VarType.FP16, + persistable=False, + stop_gradient=True) + + with block.program._backward_role_guard(): + cast_op = block.append_op( + type="cast", + inputs={"X": grad}, + outputs={"Out": new_grad}, + attrs={ + "in_dtype": core.VarDesc.VarType.FP32, + "out_dtype": core.VarDesc.VarType.FP16 + }, + stop_gradient=True) + + backward = op_maker.OpRole.Backward + cast_op._set_attr(op_maker.kOpRoleAttrName(), backward) + cast_op._set_attr(op_maker.kOpRoleVarAttrName(), + [param.name, new_grad.name]) + new_grad.op = cast_op + + new_param_and_grads.append((param, new_grad, True)) + + ret_param_and_grads = [] + # cast grad from fp16->fp32 after allreduce. + # NOTE. Now we split fp16 compression into two for loops, + # if we do not separate them, fuse allreduce will wrong. + # This must be the problem of fuse allreduce pass, need + # fixed in future. + for param, grad, cast in new_param_and_grads: + if not cast: + ret_param_and_grads.append((param, grad)) + continue + + block = grad.block + new_grad = block.create_var( + name=unique_name.generate(grad.name + ".cast_fp32"), + dtype=core.VarDesc.VarType.FP32, + persistable=False, + stop_gradient=True) + + with block.program._optimized_guard( + [param, grad]), framework.name_scope('fp16_allreduce'): + cast_op = block.append_op( + type="cast", + inputs={"X": grad}, + outputs={"Out": new_grad}, + attrs={ + "in_dtype": core.VarDesc.VarType.FP16, + "out_dtype": core.VarDesc.VarType.FP32 + }, + stop_gradient=True) + ret_param_and_grads.append((param, new_grad)) + + return ret_param_and_grads + + def apply_optimize(self, loss, startup_program, params_grads): + new_params_grads = self.fp16_compression(params_grads) + return self.inner_opt.apply_optimize( + loss, + startup_program=startup_program, + params_grads=new_params_grads) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 94bc6235ad1..2f8952a4431 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -45,6 +45,7 @@ list(APPEND MIXED_DIST_TEST_OPS test_fleet_localsgd_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_lars_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_lamb_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_dgc_meta_optimizer) +list(APPEND MIXED_DIST_TEST_OPS test_fleet_fp16_allreduce_meta_optimizer) list(APPEND MIXED_DIST_TEST_OPS test_fleet_private_function) list(APPEND MIXED_DIST_TEST_OPS test_fleet_graph_executor) list(APPEND MIXED_DIST_TEST_OPS test_fleet_meta_optimizer_base) @@ -458,6 +459,7 @@ if(WITH_DISTRIBUTE) py_test_modules(test_fleet_graph_executor MODULES test_fleet_graph_executor ENVS ${dist_ENVS}) py_test_modules(test_fleet_gradient_merge_meta_optimizer MODULES test_fleet_gradient_merge_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_amp_meta_optimizer MODULES test_fleet_amp_meta_optimizer ENVS ${dist_ENVS}) + py_test_modules(test_fleet_fp16_allreduce_meta_optimizer MODULES test_fleet_fp16_allreduce_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_pipeline_meta_optimizer MODULES test_fleet_pipeline_meta_optimizer ENVS ${dist_ENVS}) py_test_modules(test_fleet_private_function MODULES test_fleet_private_function ENVS ${dist_ENVS}) py_test_modules(test_fleet_meta_optimizer_base MODULES test_fleet_meta_optimizer_base ENVS ${dist_ENVS}) diff --git a/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py new file mode 100644 index 00000000000..3198c6cac86 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dist_mnist_fp16_allreduce.py @@ -0,0 +1,63 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from paddle.distributed.fleet.meta_optimizers import FP16AllReduceOptimizer as FP16AllReduce +from test_dist_base import TestDistRunnerBase, runtime_main +from dist_mnist import cnn_model + +DTYPE = "float32" +paddle.dataset.mnist.fetch() + +# Fix seed for test +fluid.default_startup_program().random_seed = 1 +fluid.default_main_program().random_seed = 1 + + +class TestDistMnist2x2(TestDistRunnerBase): + def get_model(self, batch_size=2): + # Input data + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + # Optimization + opt = fluid.optimizer.MomentumOptimizer( + learning_rate=0.001, momentum=0.9) + opt = FP16AllReduce(opt) + + # Reader + train_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + opt.minimize(avg_cost) + return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict + + +if __name__ == "__main__": + runtime_main(TestDistMnist2x2) diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py new file mode 100644 index 00000000000..d74d08681c1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist_fp16_allreduce.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +from test_dist_base import TestDistBase + + +class TestDistMnist2x2FP16AllReduce(TestDistBase): + def _setup_config(self): + self._sync_mode = True + self._use_reduce = False + self._nccl2_mode = True + + def test_dist_train(self): + import paddle.fluid as fluid + if fluid.core.is_compiled_with_cuda(): + self.check_with_place("dist_mnist_fp16_allreduce.py", delta=1e-5) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py index b20f33e11b6..deaf342da12 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_distributed_strategy.py @@ -102,6 +102,16 @@ class TestStrategyConfig(unittest.TestCase): strategy.dgc = "True" self.assertEqual(strategy.dgc, False) + def test_fp16_allreduce(self): + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.fp16_allreduce = True + self.assertEqual(strategy.fp16_allreduce, True) + strategy.fp16_allreduce = False + self.assertEqual(strategy.fp16_allreduce, False) + with self.assertRaises(TypeError): + strategy.fp16_allreduce = "True" + self.assertEqual(strategy.fp16_allreduce, False) + def test_sync_nccl_allreduce(self): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.sync_nccl_allreduce = True diff --git a/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py new file mode 100644 index 00000000000..efffa9fa88f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fleet_fp16_allreduce_meta_optimizer.py @@ -0,0 +1,91 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.distributed.fleet as fleet +import paddle.distributed.fleet.base.role_maker as role_maker +import unittest +import paddle +import paddle.fluid as fluid +import os + +paddle.enable_static() + + +class TestFleetFP16CompressOptimizer(unittest.TestCase): + def setUp(self): + os.environ["PADDLE_TRAINER_ID"] = "0" + os.environ["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36001" + + def net(self, main_prog, startup_prog, dtype='float32'): + with fluid.program_guard(main_prog, startup_prog): + input_x = paddle.fluid.layers.data( + name="x", shape=[32], dtype=dtype) + input_y = paddle.fluid.layers.data( + name="y", shape=[1], dtype='int64') + + fc_1 = paddle.fluid.layers.fc(input=input_x, size=64, act='tanh') + fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh') + prediction = paddle.fluid.layers.fc(input=[fc_2], + size=2, + act='softmax') + cost = paddle.fluid.layers.cross_entropy( + input=prediction, label=input_y) + avg_cost = paddle.fluid.layers.mean(x=cost) + + strategy = paddle.distributed.fleet.DistributedStrategy() + strategy.fp16_allreduce = True + return avg_cost, strategy + + def test_fp16_allreduce_optimizer(self): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + train_prog, startup_prog = fluid.Program(), fluid.Program() + avg_cost, strategy = self.net(train_prog, startup_prog) + + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + ops = [op.type for op in avg_cost.block.ops] + cast_out = [ + op.output('Out')[0] for op in avg_cost.block.ops + if op.type == 'cast' + ] + + cast_op_count = 0 + for name in ops: + if name == 'cast': + cast_op_count += 1 + self.assertIn('cast', ops) + self.assertEqual(cast_op_count, 12) # 6 + 6, cast_fp16 + cast_fp32 + + for name in cast_out: + self.assertIn('cast_fp16', name) + + def test_fp16_allreduce_not_apply_fp16_net(self): + role = role_maker.PaddleCloudRoleMaker(is_collective=True) + fleet.init(role) + train_prog, startup_prog = fluid.Program(), fluid.Program() + avg_cost, strategy = self.net(train_prog, startup_prog, dtype='float16') + + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + + ops = [op.type for op in avg_cost.block.ops] + self.assertNotIn('cast', ops) + + +if __name__ == "__main__": + unittest.main() -- GitLab From dd04b160d9492969715f3dc5caff5e73ab321b27 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 25 Sep 2020 11:32:28 +0800 Subject: [PATCH 089/117] fix test imperative se resnext failed (#27538) --- .../unittests/test_imperative_se_resnext.py | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py index a04e1e4e5aa..e47a70054be 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_se_resnext.py @@ -25,6 +25,9 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.base import to_variable from test_imperative_base import new_program_scope +if fluid.is_compiled_with_cuda(): + fluid.set_flags({'FLAGS_cudnn_deterministic': True}) + batch_size = 8 train_parameters = { "input_size": [3, 224, 224], @@ -340,7 +343,9 @@ class TestImperativeResneXt(unittest.TestCase): label.stop_gradient = True out = se_resnext(img) - loss = fluid.layers.cross_entropy(input=out, label=label) + softmax_out = fluid.layers.softmax(out, use_cudnn=False) + loss = fluid.layers.cross_entropy( + input=softmax_out, label=label) avg_loss = fluid.layers.mean(x=loss) dy_out = avg_loss.numpy() @@ -386,7 +391,8 @@ class TestImperativeResneXt(unittest.TestCase): name='pixel', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = se_resnext(img) - loss = fluid.layers.cross_entropy(input=out, label=label) + softmax_out = fluid.layers.softmax(out, use_cudnn=False) + loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = fluid.layers.mean(x=loss) optimizer.minimize(avg_loss) @@ -443,7 +449,9 @@ class TestImperativeResneXt(unittest.TestCase): static_grad_value[static_grad_name_list[ i - grad_start_pos]] = out[i] - self.assertTrue(np.allclose(static_out, dy_out)) + self.assertTrue( + np.allclose(static_out, dy_out), + "\nstatic_out: {}\ndy_out: {}".format(static_out, dy_out)) self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) @@ -455,16 +463,23 @@ class TestImperativeResneXt(unittest.TestCase): self.assertEqual(len(dy_grad_value), len(static_grad_value)) for key, value in six.iteritems(static_grad_value): - self.assertTrue(np.allclose(value, dy_grad_value[key])) + self.assertTrue( + np.allclose(value, dy_grad_value[key]), + "\nstatic_grad_value: {}\ndy_grad_value: {}".format( + value, dy_grad_value[key])) self.assertTrue(np.isfinite(value.all())) self.assertFalse(np.isnan(value.any())) self.assertEqual(len(dy_param_value), len(static_param_value)) for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value, dy_param_value[key])) + self.assertTrue( + np.allclose(value, dy_param_value[key]), + "\nstatic_param_value: {}\ndy_param_value: {}".format( + value, dy_param_value[key])) self.assertTrue(np.isfinite(value.all())) self.assertFalse(np.isnan(value.any())) if __name__ == '__main__': + paddle.enable_static() unittest.main() -- GitLab From 597345d17b5f6252878bfb0f62133f77f485d7ef Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Fri, 25 Sep 2020 11:58:53 +0800 Subject: [PATCH 090/117] fix cuda atomic for ARCH<350 for the automic_max fix cuda atomic for ARCH<350 for the automic_max --- paddle/fluid/platform/cuda_primitives.h | 38 +++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/paddle/fluid/platform/cuda_primitives.h b/paddle/fluid/platform/cuda_primitives.h index f7c77071b12..a5dd19d4363 100644 --- a/paddle/fluid/platform/cuda_primitives.h +++ b/paddle/fluid/platform/cuda_primitives.h @@ -134,7 +134,26 @@ USE_CUDA_ATOMIC(Max, int); USE_CUDA_ATOMIC(Max, unsigned int); // CUDA API uses unsigned long long int, we cannot use uint64_t here. // It because unsigned long long int is not necessarily uint64_t +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT +#else +CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) { + if (*address >= val) { + return; + } + + unsigned long long int old = *address, assumed; + + do { + assumed = old; + if (assumed >= val) { + break; + } + + old = atomicCAS(address, assumed, val); + } while (assumed != old); +} +#endif CUDA_ATOMIC_WRAPPER(Max, int64_t) { // Here, we check long long int must be int64_t. @@ -187,7 +206,26 @@ USE_CUDA_ATOMIC(Min, int); USE_CUDA_ATOMIC(Min, unsigned int); // CUDA API uses unsigned long long int, we cannot use uint64_t here. // It because unsigned long long int is not necessarily uint64_t +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT +#else +CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) { + if (*address <= val) { + return; + } + + unsigned long long int old = *address, assumed; + + do { + assumed = old; + if (assumed <= val) { + break; + } + + old = atomicCAS(address, assumed, val); + } while (assumed != old); +} +#endif CUDA_ATOMIC_WRAPPER(Min, int64_t) { // Here, we check long long int must be int64_t. -- GitLab From 6e16a0997c7017f3167fe6b672d92c4e5bfefc42 Mon Sep 17 00:00:00 2001 From: zhang wenhui Date: Fri, 25 Sep 2020 13:05:22 +0800 Subject: [PATCH 091/117] fix unittest_group_norm_op_v2, test=develop (#27486) * fix unittest_group_norm_op_v2, test=develop * fix unittest_group_norm_op_v2, test=develop --- python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py index a46b9b0ca78..833eeb33641 100644 --- a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py @@ -56,7 +56,10 @@ class TestDygraphGroupNormv2(unittest.TestCase): x = np.random.randn(*shape).astype("float32") y1 = compute_v1(x) y2 = compute_v2(x) - self.assertTrue(np.allclose(y1, y2)) + result = np.allclose(y1, y2) + if not result: + print("y1:", y1, "\ty2:", y2) + self.assertTrue(result) test_weight_bias_false() def test_static(self): -- GitLab From 77a36f8997e0c2297144a871d04331e9c9478896 Mon Sep 17 00:00:00 2001 From: Shang Zhizhou Date: Fri, 25 Sep 2020 13:27:40 +0800 Subject: [PATCH 092/117] [buf fix]:fix some unittests error (#27540) * [buf fix]:fix unittest test_activation_op error * split long-time unittests to smaller ones * rename some unittests --- .../fluid/inference/tests/api/CMakeLists.txt | 22 +++++- ...yzer_seq_pool1_compare_determine_tester.cc | 40 ++++++++++ .../api/analyzer_seq_pool1_compare_tester.cc | 39 +++++++++ ...seq_pool1_fuse_compare_zero_copy_tester.cc | 46 +++++++++++ .../analyzer_seq_pool1_fuse_statis_tester.cc | 48 +++++++++++ .../api/analyzer_seq_pool1_profile_tester.cc | 42 ++++++++++ ....cc => analyzer_seq_pool1_tester_helper.h} | 79 ++----------------- .../analyzer_transformer_compare_tester.cc | 44 +++++++++++ .../api/analyzer_transformer_fuse_tester.cc | 36 +++++++++ .../analyzer_transformer_profile_tester.cc | 45 +++++++++++ ...c => analyzer_transformer_tester_helper.h} | 61 ++------------ .../tests/unittests/test_activation_op.py | 53 +++++++++++++ 12 files changed, 427 insertions(+), 128 deletions(-) create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc rename paddle/fluid/inference/tests/api/{analyzer_seq_pool1_tester.cc => analyzer_seq_pool1_tester_helper.h} (70%) create mode 100644 paddle/fluid/inference/tests/api/analyzer_transformer_compare_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc create mode 100644 paddle/fluid/inference/tests/api/analyzer_transformer_profile_tester.cc rename paddle/fluid/inference/tests/api/{analyzer_transformer_tester.cc => analyzer_transformer_tester_helper.h} (82%) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 28211d0ce08..5d6970fc4e3 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -132,9 +132,17 @@ if(NOT APPLE AND WITH_MKLML) # seq_pool1 set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool") download_model_and_data(${SEQ_POOL1_INSTALL_DIR} "seq_pool1_model_.tar.gz" "seq_pool1_data.txt.tar.gz") - inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1_compare_determine ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_compare_determine_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_compare_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1_fuse_compare_zero_copy ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1_fuse_statis ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_fuse_statis_tester.cc) + inference_analysis_api_test(test_analyzer_seq_pool1_profile ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_profile_tester.cc) if(NOT WIN32) - set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 150) + set_tests_properties(test_analyzer_seq_pool1_compare_determine PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_fuse_compare_zero_copy PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_fuse_statis PROPERTIES TIMEOUT 120) + set_tests_properties(test_analyzer_seq_pool1_profile PROPERTIES TIMEOUT 120) endif() else() # TODO: fix this test on MACOS and OPENBLAS, the reason is that @@ -215,7 +223,15 @@ inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} ana # transformer, the dataset only works on batch_size=8 now set(TRANSFORMER_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/transformer") download_model_and_data(${TRANSFORMER_INSTALL_DIR} "temp/transformer_model.tar.gz" "temp/transformer_data.txt.tar.gz") -inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_tester.cc +inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_compare_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +inference_analysis_test(test_analyzer_transformer_fuse SRCS analyzer_transformer_fuse_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 + --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) +inference_analysis_test(test_analyzer_transformer_profile SRCS analyzer_transformer_profile_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt --batch_size=8 --cpu_num_threads=${CPU_NUM_THREADS_ON_CI}) diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc new file mode 100644 index 00000000000..8f0778b83e5 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc @@ -0,0 +1,40 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester { + +// Compare Deterministic result +TEST(Analyzer_seq_pool1_compare_determine, compare_determine) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareDeterministic(reinterpret_cast(&cfg), + input_slots_all); +} + +} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc new file mode 100644 index 00000000000..099ff1f31a7 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester { + +TEST(Analyzer_seq_pool1_compare, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); +} + +} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc new file mode 100644 index 00000000000..1fbcbf1a3f4 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc @@ -0,0 +1,46 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester { + +// Compare result of AnalysisConfig and AnalysisConfig + ZeroCopy +TEST(Analyzer_seq_pool1_compare_zero_copy, compare_zero_copy) { + AnalysisConfig cfg; + SetConfig(&cfg); + + AnalysisConfig cfg1; + SetConfig(&cfg1); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + std::vector outputs_name; + outputs_name.emplace_back(out_var_name); + CompareAnalysisAndZeroCopy(reinterpret_cast(&cfg), + reinterpret_cast(&cfg1), + input_slots_all, outputs_name); +} + +} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc new file mode 100644 index 00000000000..b8ccb8cee50 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester { + +// Check the fuse status +TEST(Analyzer_seq_pool1_fuse_statis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); + ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); + ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); + ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); + EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); + EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 2); + EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); + LOG(INFO) << "num_ops: " << num_ops; + EXPECT_EQ(num_ops, 171); +} + +} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc new file mode 100644 index 00000000000..0ccd95f2a17 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace seq_pool1_tester { + +void profile(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg, use_mkldnn); + + std::vector> outputs; + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(reinterpret_cast(&cfg), + input_slots_all, &outputs, FLAGS_num_threads); +} + +TEST(Analyzer_seq_pool1_profile, profile) { profile(); } + +} // namespace seq_pool1_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h similarity index 70% rename from paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc rename to paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h index 9f1556cdb87..0dac11bc345 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h @@ -11,15 +11,20 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - +#pragma once #include #include #include +#include +#include +#include +#include #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { namespace analysis { +namespace seq_pool1_tester { // diff: similarity_norm.tmp_0, for speed: fc_4.tmp_1 static const char out_var_name[] = "reduce_sum_0.tmp_0"; @@ -164,77 +169,7 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) { cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass"); } -void profile(bool use_mkldnn = false) { - AnalysisConfig cfg; - SetConfig(&cfg, use_mkldnn); - - std::vector> outputs; - std::vector> input_slots_all; - SetInput(&input_slots_all); - TestPrediction(reinterpret_cast(&cfg), - input_slots_all, &outputs, FLAGS_num_threads); -} - -TEST(Analyzer_seq_pool1, profile) { profile(); } - -// Compare result of NativeConfig and AnalysisConfig -TEST(Analyzer_seq_pool1, compare) { - AnalysisConfig cfg; - SetConfig(&cfg); - - std::vector> input_slots_all; - SetInput(&input_slots_all); - CompareNativeAndAnalysis( - reinterpret_cast(&cfg), input_slots_all); -} - -// Compare Deterministic result -TEST(Analyzer_seq_pool1, compare_determine) { - AnalysisConfig cfg; - SetConfig(&cfg); - - std::vector> input_slots_all; - SetInput(&input_slots_all); - CompareDeterministic(reinterpret_cast(&cfg), - input_slots_all); -} - -// Check the fuse status -TEST(Analyzer_seq_pool1, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis(predictor.get(), &num_ops); - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("seqpool_concat_fuse")); - ASSERT_TRUE(fuse_statis.count("squared_mat_sub_fuse")); - ASSERT_TRUE(fuse_statis.count("repeated_fc_relu_fuse")); - ASSERT_EQ(fuse_statis.at("fc_fuse"), 10); - EXPECT_EQ(fuse_statis.at("seqpool_concat_fuse"), 2); - EXPECT_EQ(fuse_statis.at("squared_mat_sub_fuse"), 2); - EXPECT_EQ(fuse_statis.at("repeated_fc_relu_fuse"), 2); - LOG(INFO) << "num_ops: " << num_ops; - EXPECT_EQ(num_ops, 171); -} - -// Compare result of AnalysisConfig and AnalysisConfig + ZeroCopy -TEST(Analyzer_seq_pool1, compare_zero_copy) { - AnalysisConfig cfg; - SetConfig(&cfg); - - AnalysisConfig cfg1; - SetConfig(&cfg1); - - std::vector> input_slots_all; - SetInput(&input_slots_all); - std::vector outputs_name; - outputs_name.emplace_back(out_var_name); - CompareAnalysisAndZeroCopy(reinterpret_cast(&cfg), - reinterpret_cast(&cfg1), - input_slots_all, outputs_name); -} - +} // namespace seq_pool1_tester } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_compare_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_compare_tester.cc new file mode 100644 index 00000000000..f26ec57103b --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_compare_tester.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace transformer_tester { + +void compare(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + if (use_mkldnn) { + cfg.EnableMKLDNN(); + cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); + } + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); +} + +TEST(Analyzer_Transformer, compare) { compare(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); } +#endif + +} // namespace transformer_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc new file mode 100644 index 00000000000..4e5484c9ea0 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_fuse_tester.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace transformer_tester { + +// Check the fuse status +TEST(Analyzer_Transformer, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); +} + +} // namespace transformer_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_profile_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_profile_tester.cc new file mode 100644 index 00000000000..caeba327716 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_profile_tester.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { +namespace transformer_tester { + +void profile(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector> outputs; + if (use_mkldnn) { + cfg.EnableMKLDNN(); + cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); + } + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(reinterpret_cast(&cfg), + input_slots_all, &outputs, FLAGS_num_threads); +} + +TEST(Analyzer_Transformer, profile) { profile(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_Transformer, profile_mkldnn) { profile(true); } +#endif + +} // namespace transformer_tester +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc b/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h similarity index 82% rename from paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc rename to paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h index 9726109bf89..e43456ed832 100644 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h @@ -11,11 +11,16 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - +#pragma once +#include +#include +#include #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { +namespace analysis { +namespace transformer_tester { struct DataRecord { std::vector> src_word, src_pos, trg_word, init_idx; @@ -182,57 +187,7 @@ void SetInput(std::vector> *inputs) { } } -// Easy for profiling independently. -void profile(bool use_mkldnn = false) { - AnalysisConfig cfg; - SetConfig(&cfg); - std::vector> outputs; - if (use_mkldnn) { - cfg.EnableMKLDNN(); - cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); - } - - std::vector> input_slots_all; - SetInput(&input_slots_all); - TestPrediction(reinterpret_cast(&cfg), - input_slots_all, &outputs, FLAGS_num_threads); -} - -TEST(Analyzer_Transformer, profile) { profile(); } -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_Transformer, profile_mkldnn) { profile(true); } -#endif - -// Check the fuse status -TEST(Analyzer_Transformer, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - - int num_ops; - auto predictor = CreatePaddlePredictor(cfg); - auto fuse_statis = GetFuseStatis( - static_cast(predictor.get()), &num_ops); -} - -// Compare result of NativeConfig and AnalysisConfig -void compare(bool use_mkldnn = false) { - AnalysisConfig cfg; - SetConfig(&cfg); - if (use_mkldnn) { - cfg.EnableMKLDNN(); - cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); - } - - std::vector> input_slots_all; - SetInput(&input_slots_all); - CompareNativeAndAnalysis( - reinterpret_cast(&cfg), input_slots_all); -} - -TEST(Analyzer_Transformer, compare) { compare(); } -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); } -#endif - +} // namespace transformer_tester +} // namespace analysis } // namespace inference } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index f6ba03194aa..791f1ee2dfa 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -28,6 +28,7 @@ from paddle.fluid import compiler, Program, program_guard class TestSqrtOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program(), Program()): # The input type of sqrt op must be Variable or numpy.ndarray. in1 = 1 @@ -44,6 +45,7 @@ class TestSqrtOpError(unittest.TestCase): class TestActivation(OpTest): def setUp(self): + paddle.enable_static() self.op_type = "exp" self.init_dtype() self.init_kernel_type() @@ -71,6 +73,7 @@ class TestActivation(OpTest): class TestParameter(object): def test_out_name(self): + paddle.enable_static() with fluid.program_guard(fluid.Program()): np_x = np.array([0.1]) data = fluid.layers.data(name="X", shape=[1]) @@ -92,6 +95,7 @@ class TestParameter(object): class TestSigmoid(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "sigmoid" self.init_dtype() @@ -112,6 +116,7 @@ class TestSigmoid(TestActivation): class TestLogSigmoid(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "logsigmoid" self.init_dtype() @@ -180,6 +185,7 @@ class TestLogSigmoidAPI(unittest.TestCase): class TestTanh(TestActivation, TestParameter): def setUp(self): + paddle.enable_static() self.op_type = "tanh" self.init_dtype() x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) @@ -255,6 +261,7 @@ class TestTanhAPI(unittest.TestCase): class TestAtan(TestActivation, TestParameter): def setUp(self): + paddle.enable_static() self.op_type = "atan" self.init_dtype() @@ -291,6 +298,7 @@ class TestAtan(TestActivation, TestParameter): class TestSinh(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "sinh" self.init_dtype() @@ -349,6 +357,7 @@ class TestSinh(TestActivation): class TestSinhOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.sinh, 1) @@ -362,6 +371,7 @@ class TestSinhOpError(unittest.TestCase): class TestCosh(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "cosh" self.init_dtype() @@ -420,6 +430,7 @@ class TestCosh(TestActivation): class TestCoshOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.cosh, 1) @@ -438,6 +449,7 @@ def ref_tanhshrink(x): class TestTanhshrink(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "tanh_shrink" self.init_dtype() @@ -512,6 +524,7 @@ def ref_hardshrink(x, threshold): class TestHardShrink(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "hard_shrink" self.init_dtype() @@ -541,6 +554,7 @@ class TestHardShrink_threshold_negative(TestHardShrink): class TestHardShrinkAPI(unittest.TestCase): # test paddle.nn.Hardshrink, paddle.nn.functional.hardshrink def setUp(self): + paddle.enable_static() self.x_np = np.random.uniform(-1, 1, [10, 12]).astype('float32') self.place=paddle.CUDAPlace(0) if core.is_compiled_with_cuda() \ else paddle.CPUPlace() @@ -662,6 +676,7 @@ def ref_softshrink(x, threshold=0.5): class TestSoftshrink(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "softshrink" self.init_dtype() @@ -736,6 +751,7 @@ class TestSoftshrinkAPI(unittest.TestCase): class TestSqrt(TestActivation, TestParameter): def setUp(self): + paddle.enable_static() self.op_type = "sqrt" self.init_dtype() @@ -753,6 +769,7 @@ class TestSqrt(TestActivation, TestParameter): class TestRsqrt(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "rsqrt" self.init_dtype() @@ -770,6 +787,7 @@ class TestRsqrt(TestActivation): class TestAbs(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "abs" self.init_dtype() @@ -792,6 +810,7 @@ class TestAbs(TestActivation): class TestCeil(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "ceil" self.init_dtype() @@ -808,6 +827,7 @@ class TestCeil(TestActivation): class TestFloor(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "floor" self.init_dtype() @@ -826,6 +846,7 @@ class TestFloor(TestActivation): class TestCos(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "cos" self.init_dtype() @@ -843,6 +864,7 @@ class TestCos(TestActivation): class TestAcos(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "acos" self.init_dtype() @@ -860,6 +882,7 @@ class TestAcos(TestActivation): class TestSin(TestActivation, TestParameter): def setUp(self): + paddle.enable_static() self.op_type = "sin" self.init_dtype() @@ -877,6 +900,7 @@ class TestSin(TestActivation, TestParameter): class TestAsin(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "asin" self.init_dtype() @@ -894,6 +918,7 @@ class TestAsin(TestActivation): class TestRound(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "round" self.init_dtype() @@ -909,6 +934,7 @@ class TestRound(TestActivation): class TestRelu(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "relu" self.init_dtype() @@ -979,6 +1005,7 @@ class TestLeakyRelu(TestActivation): return 0.02 def setUp(self): + paddle.enable_static() self.op_type = "leaky_relu" self.init_dtype() alpha = self.get_alpha() @@ -1084,6 +1111,7 @@ def gelu(x, approximate): class TestGeluApproximate(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "gelu" self.init_dtype() approximate = True @@ -1102,6 +1130,7 @@ class TestGeluApproximate(TestActivation): class TestGelu(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "gelu" self.init_dtype() approximate = False @@ -1169,6 +1198,7 @@ class TestGELUAPI(unittest.TestCase): class TestBRelu(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "brelu" self.init_dtype() @@ -1194,6 +1224,7 @@ class TestBRelu(TestActivation): class TestBReluOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.brelu, 1) @@ -1215,6 +1246,7 @@ def ref_relu6(x, threshold=6.0): class TestRelu6(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "relu6" self.init_dtype() @@ -1286,6 +1318,7 @@ class TestRelu6API(unittest.TestCase): class TestHardSwish(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = 'hard_swish' self.init_dtype() @@ -1310,6 +1343,7 @@ class TestHardSwish(TestActivation): class TestHardSwishOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.hard_swish, 1) @@ -1323,6 +1357,7 @@ class TestHardSwishOpError(unittest.TestCase): class TestSoftRelu(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "soft_relu" self.init_dtype() @@ -1348,6 +1383,7 @@ class TestSoftRelu(TestActivation): class TestSoftReluOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.soft_relu, 1) @@ -1366,6 +1402,7 @@ def elu(x, alpha): class TestELU(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "elu" self.init_dtype() @@ -1435,6 +1472,7 @@ class TestELUAPI(unittest.TestCase): class TestReciprocal(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "reciprocal" self.init_dtype() @@ -1452,6 +1490,7 @@ class TestReciprocal(TestActivation): class TestLog(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "log" self.init_dtype() @@ -1478,6 +1517,7 @@ class TestLog(TestActivation): class TestLog1p(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "log1p" self.init_dtype() @@ -1522,6 +1562,7 @@ class TestLog1p(TestActivation): class TestSquare(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "square" self.init_dtype() @@ -1539,6 +1580,7 @@ class TestSquare(TestActivation): class TestPow(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "pow" self.init_dtype() @@ -1557,6 +1599,7 @@ class TestPow(TestActivation): class TestPow_factor_tensor(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "pow" self.init_dtype() @@ -1633,6 +1676,7 @@ class TestPow_factor_tensor(TestActivation): class TestSTanh(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "stanh" self.init_dtype() @@ -1653,6 +1697,7 @@ class TestSTanh(TestActivation): class TestSTanhOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.stanh, 1) @@ -1673,6 +1718,7 @@ def ref_softplus(x, beta=1, threshold=20): class TestSoftplus(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "softplus" self.init_dtype() @@ -1751,6 +1797,7 @@ def ref_softsign(x): class TestSoftsign(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "softsign" self.init_dtype() @@ -1818,6 +1865,7 @@ class TestSoftsignAPI(unittest.TestCase): class TestThresholdedRelu(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "thresholded_relu" self.init_dtype() @@ -1841,6 +1889,7 @@ class TestThresholdedRelu(TestActivation): class TestThresholdedReluOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.thresholded_relu, 1) @@ -1854,6 +1903,7 @@ class TestThresholdedReluOpError(unittest.TestCase): class TestHardSigmoid(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "hard_sigmoid" self.init_dtype() @@ -1883,6 +1933,7 @@ class TestHardSigmoid(TestActivation): class TestHardSigmoidOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.hard_sigmoid, 1) @@ -1896,6 +1947,7 @@ class TestHardSigmoidOpError(unittest.TestCase): class TestSwish(TestActivation): def setUp(self): + paddle.enable_static() self.op_type = "swish" self.init_dtype() @@ -1915,6 +1967,7 @@ class TestSwish(TestActivation): class TestSwishOpError(unittest.TestCase): def test_errors(self): + paddle.enable_static() with program_guard(Program()): # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.swish, 1) -- GitLab From 6bb02e8e3c14cd3ba7bdd80ee44fd93c8a9ade6b Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 25 Sep 2020 13:41:54 +0800 Subject: [PATCH 093/117] increase retry time (#27553) --- paddle/fluid/memory/allocation/retry_allocator_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc index 0e81f5f2238..5d3e133f97d 100644 --- a/paddle/fluid/memory/allocation/retry_allocator_test.cc +++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/retry_allocator.h" + #include #include // NOLINT #include // NOLINT @@ -20,6 +21,7 @@ #include #include // NOLINT #include + #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" @@ -45,7 +47,7 @@ TEST(RetryAllocator, RetryAllocator) { size_t thread_num = 4; size_t sleep_time = 40; - size_t extra_time = 10; + size_t extra_time = 20; // Reserve to perform more tests in the future std::vector> allocators; -- GitLab From effd51b6bebdd79df798ab66eeba5465886f560e Mon Sep 17 00:00:00 2001 From: liym27 <33742067+liym27@users.noreply.github.com> Date: Fri, 25 Sep 2020 14:06:03 +0800 Subject: [PATCH 094/117] Fix error message in operator/utils.h (#27532) --- paddle/fluid/operators/utils.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index aec995304a7..05d077b173a 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -41,7 +41,9 @@ inline std::vector GetDataFromTensor(const framework::Tensor* x) { // NOTE: Converting int64 to int32 may cause data overflow. vec_new_data = std::vector(data, data + x->numel()); } else { - PADDLE_THROW("The dtype of Tensor must be int32 or int64."); + PADDLE_THROW(platform::errors::InvalidArgument( + "The dtype of Tensor must be int32 or int64, but received: %s", + x->type())); } return vec_new_data; } @@ -53,10 +55,11 @@ inline std::vector GetDataFromTensorList( for (size_t i = 0; i < list_tensor.size(); ++i) { auto tensor = list_tensor[i]; PADDLE_ENFORCE_EQ(tensor->dims(), framework::make_ddim({1}), - "ShapeError: The shape of Tensor in list must be [1]. " - "But received the shape " - "is [%s]", - tensor->dims()); + platform::errors::InvalidArgument( + "The shape of Tensor in list must be [1]. " + "But received its shape " + "is [%s]", + tensor->dims())); if (tensor->type() == framework::proto::VarType::INT32) { if (platform::is_gpu_place(tensor->place())) { @@ -76,7 +79,10 @@ inline std::vector GetDataFromTensorList( vec_new_data.push_back(static_cast(*tensor->data())); } } else { - PADDLE_THROW("The dtype of Tensor in list must be int32 or int64."); + PADDLE_THROW(platform::errors::InvalidArgument( + "The dtype of Tensor in list must be int32 or int64, but received: " + "%s", + tensor->type())); } } return vec_new_data; -- GitLab From 36ed83d27005c22a8af19211638c343c4dc5b759 Mon Sep 17 00:00:00 2001 From: GaoWei8 <53294385+GaoWei8@users.noreply.github.com> Date: Fri, 25 Sep 2020 14:09:10 +0800 Subject: [PATCH 095/117] Refine PADDLE_ENFORCE (#27360) * refine PADDLE_ENFORCE --- paddle/fluid/operators/benchmark/op_tester.cc | 32 ++++++---- .../operators/benchmark/op_tester_config.cc | 20 ++++-- paddle/fluid/operators/jit/benchmark.cc | 12 +++- paddle/fluid/operators/jit/gen/embseqpool.cc | 30 +++++++-- paddle/fluid/operators/jit/gen/matmul.cc | 24 ++++++-- paddle/fluid/operators/jit/gen/matmul.h | 5 +- paddle/fluid/operators/jit/gen/seqpool.cc | 10 ++- paddle/fluid/operators/jit/gen/seqpool.h | 9 ++- paddle/fluid/operators/jit/gen/sgd.cc | 21 ++++++- paddle/fluid/operators/jit/gen/vbroadcast.cc | 6 +- paddle/fluid/operators/jit/gen_base.cc | 11 +++- paddle/fluid/operators/jit/helper.cc | 23 +++++-- paddle/fluid/operators/jit/helper.h | 25 +++++--- paddle/fluid/operators/jit/more/mix/mix.cc | 3 +- paddle/fluid/operators/jit/more/mkl/mkl.h | 61 ++++++++++++++++--- paddle/fluid/operators/jit/refer/refer.h | 51 +++++++++++++--- paddle/fluid/operators/jit/test.cc | 11 +++- 17 files changed, 278 insertions(+), 76 deletions(-) diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc index 5ec34e57450..654df5ccd5e 100644 --- a/paddle/fluid/operators/benchmark/op_tester.cc +++ b/paddle/fluid/operators/benchmark/op_tester.cc @@ -47,8 +47,8 @@ void OpTester::Init(const OpTesterConfig &config) { CreateInputVarDesc(); CreateOutputVarDesc(); } else { - PADDLE_THROW(platform::errors::NotFound("Operator '%s' is not registered.", - config_.op_type)); + PADDLE_THROW(platform::errors::NotFound( + "Operator '%s' is not registered in OpTester.", config_.op_type)); } if (config_.device_id >= 0) { @@ -81,7 +81,8 @@ void OpTester::Run() { platform::EnableProfiler(platform::ProfilerState::kAll); platform::SetDeviceId(config_.device_id); #else - PADDLE_THROW("'CUDAPlace' is not supported in CPU only device."); + PADDLE_THROW(platform::errors::PermissionDenied( + "'CUDAPlace' is not supported in CPU only device.")); #endif } @@ -162,7 +163,8 @@ framework::proto::VarType::Type OpTester::TransToVarType(std::string str) { } else if (str == "fp64") { return framework::proto::VarType::FP64; } else { - PADDLE_THROW("Unsupported dtype %s.", str.c_str()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported dtype %s in OpTester.", str.c_str())); } } @@ -233,8 +235,8 @@ void OpTester::CreateOpDesc() { case framework::proto::AttrType::INTS: case framework::proto::AttrType::FLOATS: case framework::proto::AttrType::STRINGS: - PADDLE_THROW( - platform::errors::Unimplemented("Not supported STRINGS type yet.")); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported STRINGS type in OpTester yet.")); break; case framework::proto::AttrType::LONG: { int64_t value = StringTo(value_str); @@ -242,7 +244,8 @@ void OpTester::CreateOpDesc() { } break; case framework::proto::AttrType::LONGS: default: - PADDLE_THROW("Unsupport attr type %d", type); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupport attr type %d in OpTester.", type)); } } } @@ -299,7 +302,8 @@ void OpTester::SetupTensor(framework::LoDTensor *tensor, } is.close(); } else { - PADDLE_THROW("Unsupported initializer %s.", initializer.c_str()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported initializer %s in OpTester.", initializer.c_str())); } if (!platform::is_cpu_place(place_)) { @@ -351,7 +355,8 @@ void OpTester::CreateVariables(framework::Scope *scope) { static_cast(1.0), item.second.initializer, item.second.filename); } else { - PADDLE_THROW("Unsupported dtype %d.", data_type); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported dtype %d in OpTester.", data_type)); } VLOG(3) << "Set lod for tensor " << var_name; @@ -473,7 +478,8 @@ std::string OpTester::DebugString() { << "\n"; } break; default: - PADDLE_THROW("Unsupport attr type %d", attr_type); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupport attr type %d in OpTester.", attr_type)); } ss << GenSpaces(--count) << "}\n"; } @@ -484,8 +490,10 @@ std::string OpTester::DebugString() { TEST(op_tester, base) { if (!FLAGS_op_config_list.empty()) { std::ifstream fin(FLAGS_op_config_list, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", - FLAGS_op_config_list.c_str()); + PADDLE_ENFORCE_EQ( + static_cast(fin), true, + platform::errors::InvalidArgument("OpTester cannot open file %s", + FLAGS_op_config_list.c_str())); std::vector op_configs; while (!fin.eof()) { VLOG(4) << "Reading config " << op_configs.size() << "..."; diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc index 818e5f64edc..e9477798858 100644 --- a/paddle/fluid/operators/benchmark/op_tester_config.cc +++ b/paddle/fluid/operators/benchmark/op_tester_config.cc @@ -78,7 +78,8 @@ void OpInputConfig::ParseDType(std::istream& is) { } else if (dtype_str == "fp64" || dtype_str == "double") { dtype = "fp64"; } else { - PADDLE_THROW("Unsupported dtype %s", dtype_str.c_str()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported dtype %s in OpInputConfig.", dtype_str.c_str())); } VLOG(4) << "dtype of input " << name << " is: " << dtype; } @@ -91,7 +92,9 @@ void OpInputConfig::ParseInitializer(std::istream& is) { const std::vector supported_initializers = {"random", "natural", "zeros", "file"}; if (!Has(supported_initializers, initializer_str)) { - PADDLE_THROW("Unsupported initializer %s", initializer_str.c_str()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported initializer %s in OpInputConfig.", + initializer_str.c_str())); } initializer = initializer_str; @@ -126,7 +129,12 @@ void OpInputConfig::ParseLoD(std::istream& is) { } } EraseEndSep(&lod_str); - PADDLE_ENFORCE_GE(lod_str.length(), 4U); + PADDLE_ENFORCE_GE( + lod_str.length(), 4U, + platform::errors::InvalidArgument( + "The length of lod string should be " + "equal to or larger than 4. But length of lod string is %zu.", + lod_str.length())); VLOG(4) << "lod: " << lod_str << ", length: " << lod_str.length(); // Parse the lod_str @@ -153,8 +161,10 @@ void OpInputConfig::ParseLoD(std::istream& is) { OpTesterConfig::OpTesterConfig(const std::string& filename) { std::ifstream fin(filename, std::ios::in | std::ios::binary); - PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", - filename.c_str()); + PADDLE_ENFORCE_EQ( + static_cast(fin), true, + platform::errors::InvalidArgument("OpTesterConfig cannot open file %s.", + filename.c_str())); Init(fin); } diff --git a/paddle/fluid/operators/jit/benchmark.cc b/paddle/fluid/operators/jit/benchmark.cc index 898f27f9afe..d65cdc6c150 100644 --- a/paddle/fluid/operators/jit/benchmark.cc +++ b/paddle/fluid/operators/jit/benchmark.cc @@ -136,7 +136,6 @@ void BenchAllImpls(const typename KernelTuple::attr_type& attr, Args... args) { } using Tensor = paddle::framework::Tensor; - template void BenchKernelXYZN() { using T = typename KernelTuple::data_type; @@ -320,8 +319,15 @@ void BenchKernelSgd() { const T lr = 0.1; auto UnDuplicatedRandomVec = [](int n, const int64_t lower, const int64_t upper) -> std::vector { - PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1); - PADDLE_ENFORCE_GT(n, 0); + PADDLE_ENFORCE_LE( + static_cast(upper - lower), n - 1, + paddle::platform::errors::InvalidArgument( + "The range of Sgd (upper - lower) should be equal to or lower " + "than n-1 (Sgd size -1). But upper - lower is %d and n-1 is %d.", + static_cast(upper - lower), (n - 1))); + PADDLE_ENFORCE_GT( + n, 0, paddle::platform::errors::InvalidArgument( + "The Sgd size should be larger than 0. But the n is %d.", n)); std::vector all, out; for (int i = 0; i < n; ++i) { all.push_back(i); diff --git a/paddle/fluid/operators/jit/gen/embseqpool.cc b/paddle/fluid/operators/jit/gen/embseqpool.cc index b4e63d87eac..c549fec0970 100644 --- a/paddle/fluid/operators/jit/gen/embseqpool.cc +++ b/paddle/fluid/operators/jit/gen/embseqpool.cc @@ -132,11 +132,31 @@ class EmbSeqPoolCreator : public JitCodeCreator { } std::unique_ptr CreateJitCode( const emb_seq_pool_attr_t& attr) const override { - PADDLE_ENFORCE_GT(attr.table_height, 0); - PADDLE_ENFORCE_GT(attr.table_width, 0); - PADDLE_ENFORCE_GT(attr.index_height, 0); - PADDLE_ENFORCE_GT(attr.index_width, 0); - PADDLE_ENFORCE_GT(attr.out_width, 0); + PADDLE_ENFORCE_GT(attr.table_height, 0, + platform::errors::InvalidArgument( + "The attribute table_height of EmbSeqPool should " + "be larger than 0. But it is %d.", + attr.table_height)); + PADDLE_ENFORCE_GT(attr.table_width, 0, + platform::errors::InvalidArgument( + "The attribute table_width of EmbSeqPool should " + "be larger than 0. But it is %d.", + attr.table_width)); + PADDLE_ENFORCE_GT(attr.index_height, 0, + platform::errors::InvalidArgument( + "The attribute index_height of EmbSeqPool should " + "be larger than 0. But it is %d.", + attr.index_height)); + PADDLE_ENFORCE_GT(attr.index_width, 0, + platform::errors::InvalidArgument( + "The attribute index_width of EmbSeqPool should " + "be larger than 0. But it is %d.", + attr.index_width)); + PADDLE_ENFORCE_GT(attr.out_width, 0, + platform::errors::InvalidArgument( + "The attribute out_width of EmbSeqPool should be " + "larger than 0. But it is %d.", + attr.out_width)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen/matmul.cc b/paddle/fluid/operators/jit/gen/matmul.cc index 047d0d3e1ca..3139b252cad 100644 --- a/paddle/fluid/operators/jit/gen/matmul.cc +++ b/paddle/fluid/operators/jit/gen/matmul.cc @@ -29,7 +29,11 @@ void MatMulJitCode::genCode() { preCode(); int block, rest; const auto groups = packed_groups(n_, k_, &block, &rest); - PADDLE_ENFORCE_GT(groups.front(), 0); + PADDLE_ENFORCE_GT( + groups.front(), 0, + platform::errors::InvalidArgument("The number of rest registers should " + "be larger than 0. But it is %d.", + groups.front())); const int block_len = sizeof(float) * block; const int x_reg_idx = (block == ZMM_FLOAT_BLOCK ? 32 : 16) - 1; @@ -118,9 +122,21 @@ class MatMulCreator : public JitCodeCreator { } std::unique_ptr CreateJitCode( const matmul_attr_t& attr) const override { - PADDLE_ENFORCE_GT(attr.m, 0); - PADDLE_ENFORCE_GT(attr.n, 0); - PADDLE_ENFORCE_GT(attr.k, 0); + PADDLE_ENFORCE_GT( + attr.m, 0, platform::errors::InvalidArgument( + "The attribute m (first matrix's row) of MatMul should " + "be larger than 0. But it is %d.", + attr.m)); + PADDLE_ENFORCE_GT( + attr.n, 0, platform::errors::InvalidArgument( + "The attribute n (first matrix's col) of MatMul should " + "be larger than 0. But it is %d.", + attr.n)); + PADDLE_ENFORCE_GT( + attr.k, 0, platform::errors::InvalidArgument( + "The attribute k (second matrix's col) of MatMul should " + "be larger than 0. But it is %d.", + attr.k)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen/matmul.h b/paddle/fluid/operators/jit/gen/matmul.h index 4f04f7606d2..eb7328d7e06 100644 --- a/paddle/fluid/operators/jit/gen/matmul.h +++ b/paddle/fluid/operators/jit/gen/matmul.h @@ -33,7 +33,10 @@ class MatMulJitCode : public JitCode { size_t code_size = 256 * 1024, void* code_ptr = nullptr) : JitCode(code_size, code_ptr), m_(attr.m), n_(attr.n), k_(attr.k) { - PADDLE_ENFORCE_EQ(m_, 1, "Only support m==1 yet"); + PADDLE_ENFORCE_EQ(m_, 1, platform::errors::Unimplemented( + "Jitcode of matmul only support m==1 (first " + "matrix's row) now. But m is %d.", + m_)); this->genCode(); } diff --git a/paddle/fluid/operators/jit/gen/seqpool.cc b/paddle/fluid/operators/jit/gen/seqpool.cc index ec8e4e98274..d8c7b3cdb7b 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.cc +++ b/paddle/fluid/operators/jit/gen/seqpool.cc @@ -70,8 +70,14 @@ class SeqPoolCreator : public JitCodeCreator { } std::unique_ptr CreateJitCode( const seq_pool_attr_t& attr) const override { - PADDLE_ENFORCE_GT(attr.w, 0); - PADDLE_ENFORCE_GT(attr.h, 0); + PADDLE_ENFORCE_GT(attr.w, 0, platform::errors::InvalidArgument( + "The attribute width of SeqPool should " + "be larger than 0. But it is %d.", + attr.w)); + PADDLE_ENFORCE_GT(attr.h, 0, platform::errors::InvalidArgument( + "The attribute height of SeqPool should " + "be larger than 0. But it is %d.", + attr.h)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen/seqpool.h b/paddle/fluid/operators/jit/gen/seqpool.h index cb562c4c9a6..d4e7b2e29ce 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.h +++ b/paddle/fluid/operators/jit/gen/seqpool.h @@ -127,8 +127,13 @@ class SeqPoolJitCode : public JitCode { vmovss(xmm_t(reg_idx + max_num_regs), ptr[reg_ptr_src_i]); reg_idx++; } - PADDLE_ENFORCE_EQ(reg_idx, rest_used_num_regs, - "All heights should use same regs"); + PADDLE_ENFORCE_EQ( + reg_idx, rest_used_num_regs, + platform::errors::InvalidArgument( + "All heights of SeqPool should use the same number of registers." + "It equals to the numbr of rest registers. But use %d registers " + "and the numbr of rest registers is %d.", + reg_idx, rest_used_num_regs)); for (int i = 0; i < reg_idx; ++i) { vaddps(xmm_t(i), xmm_t(i), xmm_t(i + max_num_regs)); } diff --git a/paddle/fluid/operators/jit/gen/sgd.cc b/paddle/fluid/operators/jit/gen/sgd.cc index 1452d4139b0..7fe93fdb6a5 100644 --- a/paddle/fluid/operators/jit/gen/sgd.cc +++ b/paddle/fluid/operators/jit/gen/sgd.cc @@ -116,9 +116,24 @@ class SgdCreator : public JitCodeCreator { size_t CodeSize(const sgd_attr_t& attr) const override { return 96 + 32 * 8; } std::unique_ptr CreateJitCode( const sgd_attr_t& attr) const override { - PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width); - PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height); - PADDLE_ENFORCE_GE(attr.selected_rows_size, 0); + PADDLE_ENFORCE_EQ(attr.param_width, attr.grad_width, + platform::errors::InvalidArgument( + "The attribute param_width of Sgd should be " + "equal to the attribute grad_width. But param_width " + "is %d and grad_width is %d.", + attr.param_width, attr.grad_width)); + PADDLE_ENFORCE_LE(attr.selected_rows_size, attr.grad_height, + platform::errors::InvalidArgument( + "The attribute selected_rows_size of Sgd should be " + "equal to or less than the attribute grad_height. " + "But selected_rows_size is %d and grad_height is %d.", + attr.selected_rows_size, attr.grad_height)); + PADDLE_ENFORCE_GE( + attr.selected_rows_size, 0, + platform::errors::InvalidArgument( + "The attribute selected_rows_size of Sgd should be " + "equal to or larger than 0. But selected_rows_size is %d.", + attr.selected_rows_size)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen/vbroadcast.cc b/paddle/fluid/operators/jit/gen/vbroadcast.cc index 66a8d75fd4d..4084d68c2a8 100644 --- a/paddle/fluid/operators/jit/gen/vbroadcast.cc +++ b/paddle/fluid/operators/jit/gen/vbroadcast.cc @@ -76,7 +76,11 @@ class VBroadcastCreator : public JitCodeCreator { return 96 + (w / YMM_FLOAT_BLOCK) * 16 * 8; } std::unique_ptr CreateJitCode(const int64_t& w) const override { - PADDLE_ENFORCE_GT(w, 0); + PADDLE_ENFORCE_GT( + w, 0, + platform::errors::InvalidArgument( + "The width of VBroadcast should be larger than 0. But w is %d.", + w)); return make_unique(w, CodeSize(w)); } }; diff --git a/paddle/fluid/operators/jit/gen_base.cc b/paddle/fluid/operators/jit/gen_base.cc index 4c49eff49e3..2ae71256cdd 100644 --- a/paddle/fluid/operators/jit/gen_base.cc +++ b/paddle/fluid/operators/jit/gen_base.cc @@ -49,9 +49,14 @@ void GenBase::dumpCode(const unsigned char* code) const { void* GenBase::operator new(size_t size) { void* ptr; constexpr size_t alignment = 32ul; - PADDLE_ENFORCE_EQ(posix_memalign(&ptr, alignment, size), 0, - "GenBase Alloc %ld error!", size); - PADDLE_ENFORCE(ptr, "Fail to allocate GenBase CPU memory: size = %d .", size); + PADDLE_ENFORCE_EQ( + posix_memalign(&ptr, alignment, size), 0, + platform::errors::InvalidArgument( + "Jitcode generator (GenBase) allocate %ld memory error!", size)); + PADDLE_ENFORCE_NOT_NULL(ptr, platform::errors::InvalidArgument( + "Fail to allocate jitcode generator " + "(GenBase) CPU memory: size = %d .", + size)); return ptr; } diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index 2952cdb8714..c66e8092d5e 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -66,7 +66,8 @@ const char* to_string(KernelType kt) { ONE_CASE(kEmbSeqPool); ONE_CASE(kSgd); default: - PADDLE_THROW("Not support type: %d, or forget to add it.", kt); + PADDLE_THROW(platform::errors::Unimplemented( + "JIT kernel do not support type: %d.", kt)); return "NOT JITKernel"; } return nullptr; @@ -79,7 +80,8 @@ const char* to_string(SeqPoolType tp) { ONE_CASE(kAvg); ONE_CASE(kSqrt); default: - PADDLE_THROW("Not support type: %d, or forget to add it.", tp); + PADDLE_THROW(platform::errors::Unimplemented( + "SeqPool JIT kernel do not support type: %d.", tp)); return "NOT PoolType"; } return nullptr; @@ -100,7 +102,8 @@ KernelType to_kerneltype(const std::string& act) { } else if (lower == "tanh" || lower == "vtanh") { return kVTanh; } - PADDLE_THROW("Not support type: %s, or forget to add this case", act); + PADDLE_THROW(platform::errors::Unimplemented( + "Act JIT kernel do not support type: %s.", act)); return kNone; } @@ -109,12 +112,19 @@ void pack_weights(const float* src, float* dst, int n, int k) { int block, rest; const auto groups = packed_groups(n, k, &block, &rest); std::for_each(groups.begin(), groups.end(), [&](int i) { - PADDLE_ENFORCE_GT(i, 0, "each element of groups should be larger than 0."); + PADDLE_ENFORCE_GT(i, 0, platform::errors::InvalidArgument( + "Each element of groups should be larger than " + "0. However the element: %d doesn't satify.", + i)); }); int sum = std::accumulate(groups.begin(), groups.end(), 0); std::memset(dst, 0, k * sum * block * sizeof(float)); PADDLE_ENFORCE_GE(sum * block, n, - "The packed n should be equal to or larger than n"); + platform::errors::InvalidArgument( + "The packed n (sum * block) should be equal to or " + "larger than n (matmul row size). " + "However, the packed n is %d and n is %d.", + sum * block, n)); const int block_len = sizeof(float) * block; int n_offset = 0; @@ -136,7 +146,8 @@ void pack_weights(const float* src, float* dst, int n, int k) { template typename std::enable_if::value>::type pack_weights( const T* src, T* dst, int n, int k) { - PADDLE_THROW("Only support pack with float type."); + PADDLE_THROW(platform::errors::Unimplemented( + "Only supports pack weights with float type.")); } } // namespace jit diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h index b6dd49b7772..0791bb58105 100644 --- a/paddle/fluid/operators/jit/helper.h +++ b/paddle/fluid/operators/jit/helper.h @@ -85,8 +85,10 @@ inline const Kernel* GetReferKernel() { auto& ref_pool = ReferKernelPool::Instance().AllKernels(); KernelKey kkey(KernelTuple::kernel_type, platform::CPUPlace()); auto ref_iter = ref_pool.find(kkey); - PADDLE_ENFORCE(ref_iter != ref_pool.end(), - "Every Kernel should have reference function."); + PADDLE_ENFORCE_NE( + ref_iter, ref_pool.end(), + platform::errors::PreconditionNotMet( + "Every Refer Kernel of jitcode should have reference function.")); auto& ref_impls = ref_iter->second; for (auto& impl : ref_impls) { auto i = dynamic_cast*>(impl.get()); @@ -101,7 +103,9 @@ template inline typename KernelTuple::func_type GetReferFunc() { auto ker = GetReferKernel(); auto p = dynamic_cast*>(ker); - PADDLE_ENFORCE(p, "The Refer kernel should exsit"); + PADDLE_ENFORCE_NOT_NULL(p, platform::errors::InvalidArgument( + "Get the reference code of kernel in CPU " + "failed. The Refer kernel should exsit.")); return p->GetFunc(); } @@ -132,7 +136,9 @@ std::vector GetAllCandidateKernels( // The last implementation should be reference function on CPUPlace. auto ref = GetReferKernel(); - PADDLE_ENFORCE(ref != nullptr, "Refer Kernel can not be empty."); + PADDLE_ENFORCE_NOT_NULL(ref, platform::errors::InvalidArgument( + "Get all candicate kernel in CPU failed. " + "The Refer Kernel can not be empty.")); res.emplace_back(ref); return res; } @@ -147,11 +153,14 @@ GetAllCandidateFuncsWithTypes(const typename KernelTuple::attr_type& attr) { std::string name = k->ImplType(); if (name == "JitCode") { auto i = dynamic_cast(k); - PADDLE_ENFORCE(i, "jitcode kernel cast can not fail."); + PADDLE_ENFORCE_NOT_NULL(i, + platform::errors::InvalidArgument( + "Generate jitcode kernel (GenBase) failed.")); res.emplace_back(std::make_pair(name, i->template getCode())); } else { auto i = dynamic_cast*>(k); - PADDLE_ENFORCE(i, "kernel cast can not fail."); + PADDLE_ENFORCE_NOT_NULL(i, platform::errors::InvalidArgument( + "Kernel cast (KernelMore) failed.")); res.emplace_back(std::make_pair(name, i->GetFunc())); } } @@ -173,7 +182,9 @@ template typename KernelTuple::func_type GetDefaultBestFunc( const typename KernelTuple::attr_type& attr) { auto funcs = GetAllCandidateFuncs(attr); - PADDLE_ENFORCE_GE(funcs.size(), 1UL); + PADDLE_ENFORCE_GE(funcs.size(), 1UL, + platform::errors::InvalidArgument( + "The candicate jit kernel is at least one in CPU.")); // Here could do some runtime benchmark of this attr and return the best one. // But yet just get the first one as the default best one, // which is searched in order and tuned by offline. diff --git a/paddle/fluid/operators/jit/more/mix/mix.cc b/paddle/fluid/operators/jit/more/mix/mix.cc index f5b7bfff898..5d63f4848e6 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.cc +++ b/paddle/fluid/operators/jit/more/mix/mix.cc @@ -95,7 +95,8 @@ void (*getActFunc(KernelType type, int d))(const T*, T*, int) { // NOLINT } else if (type == kVIdentity) { return KernelFuncs, CPUPlace>::Cache().At(d); } - PADDLE_THROW("Not support type: %s", type); + PADDLE_THROW(platform::errors::Unimplemented( + "Act JIT kernel do not support type: %s", type)); return nullptr; } diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index ee31c8df2f8..5f3c29ad5ef 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -103,11 +103,24 @@ void SeqPool(const T* x, T* y, const seq_pool_attr_t* attr) { template void EmbSeqPool(const T* table, const int64_t* idx, T* out, const emb_seq_pool_attr_t* attr) { - PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + PADDLE_ENFORCE_EQ( + attr->table_width * attr->index_width, attr->out_width, + platform::errors::InvalidArgument( + "The attribute table_width * index_width of EmbSeqPool should " + "be equal to out_width. But table_width * index_width is %d, " + "out_width is %d.", + attr->table_width * attr->index_width, attr->out_width)); auto check_idx_value_valid = [&](int64_t i) { - PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", - idx[i], i); - PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + PADDLE_ENFORCE_LT( + idx[i], attr->table_height, + platform::errors::InvalidArgument( + "The idx shoud be lower than the attribute table_height of " + "EmbSeqPool. But %dth of idx is %d and table_height is %d.", + i, idx[i], attr->table_height)); + PADDLE_ENFORCE_GE(idx[i], 0, platform::errors::InvalidArgument( + "The idx shoud be equal to or larger than " + "the 0. But %dth of idx is %d.", + i, idx[i])); }; for (int64_t w = 0; w != attr->index_width; ++w) { @@ -168,22 +181,50 @@ void Softmax(const T* x, T* y, int n, int bs, int remain = 1) { template void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, T* out, const sgd_attr_t* attr) { - PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width); - PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height); + PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width, + platform::errors::InvalidArgument( + "The attribute param_width of Sgd should be " + "equal to the attribute grad_width. But param_width " + "is %d and grad_width is %d.", + attr->param_width, attr->grad_width)); + PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height, + platform::errors::InvalidArgument( + "The attribute selected_rows_size of Sgd should be " + "equal to or less than the attribute grad_height. " + "But selected_rows_size is %d and grad_height is %d.", + attr->selected_rows_size, attr->grad_height)); T scalar = -lr[0]; int width = attr->grad_width; if (out == param) { for (int64_t i = 0; i < attr->selected_rows_size; ++i) { auto h_idx = rows[i]; - PADDLE_ENFORCE_LT(h_idx, attr->param_height); - PADDLE_ENFORCE_GE(h_idx, 0); + PADDLE_ENFORCE_LT(h_idx, attr->param_height, + platform::errors::InvalidArgument( + "The rows of Sgd should be " + "less than the attribute. But %dth of rows " + "is %d and grad_width is %d.", + i, h_idx, attr->param_height)); + PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( + "The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); VAXPY(scalar, grad + i * width, out + h_idx * width, width); } } else { for (int64_t i = 0; i < attr->selected_rows_size; ++i) { auto h_idx = rows[i]; - PADDLE_ENFORCE_LT(h_idx, attr->param_height); - PADDLE_ENFORCE_GE(h_idx, 0); + PADDLE_ENFORCE_LT(h_idx, attr->param_height, + platform::errors::InvalidArgument( + "The rows of Sgd should be " + "less than the attribute. But %dth of rows " + "is %d and grad_width is %d.", + i, h_idx, attr->param_height)); + PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( + "The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); VScal(&scalar, grad + i * width, out + h_idx * width, width); VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width, width); diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index b8d5e2c2407..42fb7b4f279 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -147,7 +147,8 @@ void (*getActFunc(KernelType type))(const T*, T*, int) { // NOLINT } else if (type == kVIdentity) { return VIdentity; } - PADDLE_THROW("Not support type: %s", type); + PADDLE_THROW(platform::errors::Unimplemented( + "Act JIT kernel do not support type: %s.", type)); return nullptr; } @@ -465,12 +466,25 @@ void Softmax(const T* x, T* y, int n, int bs = 1, int remain = 1) { template void EmbSeqPool(const T* table, const int64_t* idx, T* out, const emb_seq_pool_attr_t* attr) { - PADDLE_ENFORCE_EQ(attr->table_width * attr->index_width, attr->out_width); + PADDLE_ENFORCE_EQ( + attr->table_width * attr->index_width, attr->out_width, + platform::errors::InvalidArgument( + "The attribute table_width * index_width of EmbSeqPool should " + "be equal to out_width. But table_width * index_width is %d and " + "out_width is %d.", + attr->table_width * attr->index_width, attr->out_width)); auto check_idx_value_valid = [&](int64_t i) { - PADDLE_ENFORCE_LT(idx[i], attr->table_height, "idx value: %d, i: %d", - idx[i], i); - PADDLE_ENFORCE_GE(idx[i], 0, "idx value: %d, i: %d", idx[i], i); + PADDLE_ENFORCE_LT( + idx[i], attr->table_height, + platform::errors::InvalidArgument( + "The idx shoud be lower than the attribute table_height of " + "EmbSeqPool. But %dth of idx is %d and table_height is %d.", + i, idx[i], attr->table_height)); + PADDLE_ENFORCE_GE(idx[i], 0, platform::errors::InvalidArgument( + "The idx shoud be equal to or larger than " + "the 0. But %dth of idx is %d.", + i, idx[i])); }; for (int64_t w = 0; w != attr->index_width; ++w) { @@ -505,12 +519,31 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out, template void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, T* out, const sgd_attr_t* attr) { - PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width); - PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height); + PADDLE_ENFORCE_EQ(attr->param_width, attr->grad_width, + platform::errors::InvalidArgument( + "The attribute param_width of Sgd should be " + "equal to the attribute grad_width. But param_width " + "is %d and grad_width is %d.", + attr->param_width, attr->grad_width)); + PADDLE_ENFORCE_LE(attr->selected_rows_size, attr->grad_height, + platform::errors::InvalidArgument( + "The attribute selected_rows_size of Sgd should be " + "equal to or less than the attribute grad_height. " + "But selected_rows_size is %d and grad_height is %d.", + attr->selected_rows_size, attr->grad_height)); for (int64_t i = 0; i < attr->selected_rows_size; ++i) { auto h_idx = rows[i]; - PADDLE_ENFORCE_LT(h_idx, attr->param_height); - PADDLE_ENFORCE_GE(h_idx, 0); + PADDLE_ENFORCE_LT(h_idx, attr->param_height, + platform::errors::InvalidArgument( + "The rows of Sgd should be " + "less than the attribute. But %dth of rows " + "is %d and grad_width is %d.", + i, h_idx, attr->param_height)); + PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( + "The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); for (int64_t j = 0; j < attr->grad_width; ++j) { out[h_idx * attr->grad_width + j] = param[h_idx * attr->grad_width + j] - diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index eb56f111f08..0cc62720b87 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -850,8 +850,15 @@ void TestKernelSgd() { const T lr = 0.1; auto UnDuplicatedRandomVec = [](int n, const int64_t lower, const int64_t upper) -> std::vector { - PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1); - PADDLE_ENFORCE_GT(n, 0); + PADDLE_ENFORCE_LE(static_cast(upper - lower), n - 1, + paddle::platform::errors::InvalidArgument( + "The range of Sgd (upper - lower) should be lower " + "than n-1 (Sgd size -1). But the upper - lower is %d " + "and n-1 is %d.", + static_cast(upper - lower), n - 1)); + PADDLE_ENFORCE_GT( + n, 0, paddle::platform::errors::InvalidArgument( + "The Sgd size should be larger than 0. But the n is %d.", n)); std::vector all, out; for (int i = 0; i < n; ++i) { all.push_back(i); -- GitLab From fab4e6d08f55ccbd07749d5df8eaf4d0d760e1b4 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Fri, 25 Sep 2020 14:49:48 +0800 Subject: [PATCH 096/117] add abs support double grad add abs support double grad for the api 2.0 --- paddle/fluid/operators/activation_op.cc | 42 +++++++++++++++++-- paddle/fluid/operators/activation_op.cu | 14 ++++++- paddle/fluid/operators/activation_op.h | 21 ++++++++++ .../unittests/test_activation_nn_grad.py | 24 +++++++++++ 4 files changed, 97 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 5a3660cee85..95214484dca 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -763,10 +763,28 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { } }; -// +// AbsGrad: dx=dy if x >=0 else -dy +// AbsDoubleGrad: ddy = ddx if x >=0 else -ddx +template +class AbsDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("abs_grad_grad"); + // input1: x + op->SetInput("X", this->Input("X")); + // input2: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetAttrMap(this->Attrs()); + // output: ddy + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + // ReluGrad: dx = dy if y >= 0 else 0 // ReluGradGrad: ddy = ddx if y >= 0 else 0 -// template class ReluDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { public: @@ -1214,7 +1232,13 @@ REGISTER_OPERATOR( std::conditional>(), ops::ActFwdInplaceInferer, void>::type); REGISTER_OPERATOR(abs_grad, ops::ActivationOpGrad, - ops::ActivationGradOpInplaceInferer); + ops::ActivationGradOpInplaceInferer, + ops::AbsDoubleGradMaker, + ops::AbsDoubleGradMaker); +REGISTER_OPERATOR( + abs_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInferer); REGISTER_OP_CPU_KERNEL(abs, ops::ActivationKernel>, ops::ActivationGradKernel>); +REGISTER_OP_CPU_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ /* ========================== register checkpoint ===========================*/ diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 48ec90471f0..072d952d261 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -160,7 +160,7 @@ REGISTER_OP_CUDA_KERNEL( ops::ExpGradFunctor>); /* ========================================================================== */ -/* ========================== exp register ============================ */ +/* ========================== abs register ============================ */ REGISTER_OP_CUDA_KERNEL( abs, ops::ActivationKernel>, @@ -180,4 +180,16 @@ REGISTER_OP_CUDA_KERNEL( ops::AbsGradFunctor>, ops::ActivationGradKernel>); +REGISTER_OP_CUDA_KERNEL( + abs_grad_grad, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>, + ops::ActivationDoubleGradKernel>); /* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 00a7c063c91..646f546bffb 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1430,6 +1430,27 @@ class ActivationDoubleGradKernel } }; +template +struct AbsGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* Out, const framework::Tensor* ddX, + framework::Tensor* ddOut, framework::Tensor* dOut, + framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "AbsGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "AbsGradGrad")); + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "AbsGradGrad")); + ddout.device(*d) = ddx * x.sign(); + } + } + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + template struct ReluGradGradFunctor : public BaseActivationFunctor { template diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index db9e8d2c6bd..e8b8a45fb67 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -147,5 +147,29 @@ class TestSquareDoubleGradCheck(unittest.TestCase): self.func(p) +class TestAbsDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + shape = [2, 3, 7, 9] + eps = 1e-6 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.abs(x) + x_arr = np.random.uniform(-1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main() -- GitLab From 6fc74bbaf614cf8501a812b7044191df8f21117d Mon Sep 17 00:00:00 2001 From: ShenLiang Date: Fri, 25 Sep 2020 15:18:35 +0800 Subject: [PATCH 097/117] add fp16 for matmul (#27523) * add fp16 for matmul --- paddle/fluid/operators/math/blas_impl.cu.h | 29 ++++++ paddle/fluid/operators/matmul_v2_op.cu | 10 +- paddle/fluid/operators/matmul_v2_op.h | 55 ++++++----- .../tests/unittests/test_matmul_v2_op.py | 99 ++++++++++++++----- python/paddle/tensor/linalg.py | 4 +- 5 files changed, 142 insertions(+), 55 deletions(-) diff --git a/paddle/fluid/operators/math/blas_impl.cu.h b/paddle/fluid/operators/math/blas_impl.cu.h index a0464cf70e2..aeafe22235c 100644 --- a/paddle/fluid/operators/math/blas_impl.cu.h +++ b/paddle/fluid/operators/math/blas_impl.cu.h @@ -420,6 +420,22 @@ void Blas::GEMV(bool trans_a, int M, int N, }); } +template <> +template <> +inline void Blas::GEMV( + bool trans_a, int M, int N, platform::float16 alpha, + const platform::float16 *A, const platform::float16 *B, + platform::float16 beta, platform::float16 *C) const { + // Because cublas doesn't support half gemv, we use cublasHgemm to achieve it. + if (trans_a) { + this->template GEMM(CblasNoTrans, CblasNoTrans, 1, N, M, + alpha, B, A, beta, C); + } else { + this->template GEMM(CblasNoTrans, CblasNoTrans, M, 1, N, + alpha, A, B, beta, C); + } +} + template <> template void Blas::BatchedGEMM( @@ -479,6 +495,19 @@ void Blas::BatchedGEMM( } } +template <> +template <> +inline void Blas::BatchedGEMM( + CBLAS_TRANSPOSE transA, CBLAS_TRANSPOSE transB, int M, int N, int K, + platform::float16 alpha, const platform::float16 **A, + const platform::float16 **B, platform::float16 beta, platform::float16 **C, + int batchCount) const { + for (int k = 0; k < batchCount; ++k) { + this->template GEMM(transA, transB, M, N, K, alpha, A[k], + B[k], beta, C[k]); + } +} + template <> template void Blas::TRSM(CBLAS_SIDE side, CBLAS_UPLO uplo, diff --git a/paddle/fluid/operators/matmul_v2_op.cu b/paddle/fluid/operators/matmul_v2_op.cu index 64ec65a2341..91958513ddb 100644 --- a/paddle/fluid/operators/matmul_v2_op.cu +++ b/paddle/fluid/operators/matmul_v2_op.cu @@ -17,10 +17,12 @@ limitations under the License. */ namespace ops = paddle::operators; namespace plf = paddle::platform; -REGISTER_OP_CUDA_KERNEL(matmul_v2, - ops::MatMulV2Kernel, - ops::MatMulV2Kernel); +REGISTER_OP_CUDA_KERNEL( + matmul_v2, ops::MatMulV2Kernel, + ops::MatMulV2Kernel, + ops::MatMulV2Kernel); REGISTER_OP_CUDA_KERNEL( matmul_v2_grad, ops::MatMulV2GradKernel, - ops::MatMulV2GradKernel); + ops::MatMulV2GradKernel, + ops::MatMulV2GradKernel); diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 8cd4fa12be4..ee485bd1711 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -163,17 +163,20 @@ void MatMulFunction(const Tensor* X, const Tensor* Y, if (trans_y) { const int M = Y->numel() / N; VLOG(3) << "MatMul's case 2"; - blas.GEMV(false, M, N, 1., y_data, x_data, 0., Out->data()); + blas.GEMV(false, M, N, static_cast(1), y_data, x_data, + static_cast(0), Out->data()); } else { const int M = y_dims[y_ndim - 1]; const int batch_size = Y->numel() / (M * N); if (batch_size == 1) { VLOG(3) << "MatMul's case 3"; - blas.GEMV(true, N, M, 1., y_data, x_data, 0., Out->data()); + blas.GEMV(true, N, M, static_cast(1), y_data, x_data, + static_cast(0), Out->data()); } else { VLOG(3) << "MatMul's case 4"; - blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, 1.0f, y_data, - x_data, 0, Out->data(), batch_size, M * N, 0); + blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, static_cast(1), + y_data, x_data, static_cast(0), Out->data(), + batch_size, M * N, 0); } } return; @@ -205,16 +208,19 @@ void MatMulFunction(const Tensor* X, const Tensor* Y, const int batch_size = X->numel() / (M * N); if (batch_size == 1) { VLOG(3) << "MatMul's case 5"; - blas.GEMV(true, N, M, 1.0f, x_data, y_data, 0.0f, Out->data()); + blas.GEMV(true, N, M, static_cast(1), x_data, y_data, + static_cast(0), Out->data()); } else { VLOG(3) << "MatMul's case 6"; - blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, 1.0f, x_data, - y_data, 0, Out->data(), batch_size, M * N, 0); + blas.BatchedGEMM(CblasTrans, CblasNoTrans, M, 1, N, static_cast(1), + x_data, y_data, static_cast(0), Out->data(), + batch_size, M * N, 0); } } else { const int M = X->numel() / N; VLOG(3) << "MatMul's case 7"; - blas.GEMV(false, M, N, 1.0f, x_data, y_data, 0.0f, Out->data()); + blas.GEMV(false, M, N, static_cast(1), x_data, y_data, + static_cast(0), Out->data()); } return; } @@ -263,37 +269,38 @@ void MatMulFunction(const Tensor* X, const Tensor* Y, if (x_batch_size == 1 && y_batch_size == 1) { VLOG(3) << "MatMul's case 8"; blas.GEMM(trans_x ? CblasTrans : CblasNoTrans, - trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, x_data, - y_data, 0.0f, Out->data()); + trans_y ? CblasTrans : CblasNoTrans, M, N, K, static_cast(1), + x_data, y_data, static_cast(0), Out->data()); } else if (x_batch_size == 1) { if (M == 1 && trans_y) { VLOG(3) << "MatMul's case 9"; - blas.GEMV(false, y_batch_size * N, K, 1.0f, y_data, x_data, 0.0f, - Out->data()); + blas.GEMV(false, y_batch_size * N, K, static_cast(1), y_data, x_data, + static_cast(0), Out->data()); } else { VLOG(3) << "MatMul's case 10"; blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, - trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, - x_data, y_data, 0, Out->data(), out_batch_size, 0, - K * N); + trans_y ? CblasTrans : CblasNoTrans, M, N, K, + static_cast(1), x_data, y_data, static_cast(0), + Out->data(), out_batch_size, 0, K * N); } } else if (y_batch_size == 1) { if (!trans_x) { VLOG(3) << "MatMul's case 11"; blas.GEMM(CblasNoTrans, trans_y ? CblasTrans : CblasNoTrans, - x_batch_size * M, N, K, 1.0f, x_data, y_data, 0.0f, - Out->data()); + x_batch_size * M, N, K, static_cast(1), x_data, y_data, + static_cast(0), Out->data()); } else { VLOG(3) << "MatMul's case 12"; blas.BatchedGEMM(CblasTrans, trans_y ? CblasTrans : CblasNoTrans, M, N, K, - 1.0f, x_data, y_data, 0, Out->data(), out_batch_size, - M * K, 0); + static_cast(1), x_data, y_data, static_cast(0), + Out->data(), out_batch_size, M * K, 0); } } else if (!is_broadcast_dims) { VLOG(3) << "MatMul's case 13"; blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, - trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, x_data, - y_data, 0, Out->data(), out_batch_size, M * K, K * N); + trans_y ? CblasTrans : CblasNoTrans, M, N, K, + static_cast(1), x_data, y_data, static_cast(0), + Out->data(), out_batch_size, M * K, K * N); } else { // in the case, can't use stridedgemm std::vector x_ptr(out_batch_size); @@ -314,9 +321,9 @@ void MatMulFunction(const Tensor* X, const Tensor* Y, } VLOG(3) << "MatMul's case 14"; blas.BatchedGEMM(trans_x ? CblasTrans : CblasNoTrans, - trans_y ? CblasTrans : CblasNoTrans, M, N, K, 1.0f, - x_ptr.data(), y_ptr.data(), 0.0f, out_ptr.data(), - out_batch_size); + trans_y ? CblasTrans : CblasNoTrans, M, N, K, + static_cast(1), x_ptr.data(), y_ptr.data(), + static_cast(0), out_ptr.data(), out_batch_size); } } diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index 884139a23d5..640771df23b 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -65,15 +65,21 @@ class TestMatMulV2Op(OpTest): self.y_shape = (100, ) self.trans_x = False self.trans_y = False + + def init_kernel_type(self): self.dtype = "float64" def setUp(self): + self.init_kernel_type() self.config() self.op_type = "matmul_v2" x = np.random.random(self.x_shape).astype(self.dtype) y = np.random.random(self.y_shape).astype(self.dtype) + # -0.1 ~ 0.1 + x = -0.1 + 0.2 * x + y = -0.1 + 0.2 * y result = reference_matmul(x, y, self.trans_x, self.trans_y) - + result = result.astype(self.dtype) self.inputs = { 'X': x, 'Y': y, @@ -98,7 +104,6 @@ class TestMatMuklOp2(TestMatMulV2Op): self.y_shape = (1, 3, 2, 100) self.trans_x = False self.trans_y = True - self.dtype = "float64" class TestMatMuklOp3(TestMatMulV2Op): @@ -111,7 +116,6 @@ class TestMatMuklOp3(TestMatMulV2Op): self.y_shape = (1, 1, 100, 2) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp4(TestMatMulV2Op): @@ -124,7 +128,6 @@ class TestMatMuklOp4(TestMatMulV2Op): self.y_shape = (1, 2, 100, 2) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp5(TestMatMulV2Op): @@ -133,11 +136,10 @@ class TestMatMuklOp5(TestMatMulV2Op): """ def config(self): - self.x_shape = (1, 1, 100, 2) + self.x_shape = (1, 1, 100, 1) self.y_shape = (100, ) self.trans_x = True self.trans_y = False - self.dtype = "float64" class TestMatMuklOp6(TestMatMulV2Op): @@ -150,7 +152,6 @@ class TestMatMuklOp6(TestMatMulV2Op): self.y_shape = (100, ) self.trans_x = True self.trans_y = False - self.dtype = "float64" class TestMatMuklOp7(TestMatMulV2Op): @@ -163,7 +164,6 @@ class TestMatMuklOp7(TestMatMulV2Op): self.y_shape = (100, ) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp8(TestMatMulV2Op): @@ -176,7 +176,6 @@ class TestMatMuklOp8(TestMatMulV2Op): self.y_shape = (1, 1, 100, 2) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp9(TestMatMulV2Op): @@ -189,7 +188,6 @@ class TestMatMuklOp9(TestMatMulV2Op): self.y_shape = (2, 1, 2, 100) self.trans_x = False self.trans_y = True - self.dtype = "float64" class TestMatMuklOp10(TestMatMulV2Op): @@ -198,11 +196,10 @@ class TestMatMuklOp10(TestMatMulV2Op): """ def config(self): - self.x_shape = (1, 1, 2, 100) - self.y_shape = (1, 2, 100, 2) + self.x_shape = (1, 1, 25, 4) + self.y_shape = (1, 2, 4, 25) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp11(TestMatMulV2Op): @@ -215,7 +212,6 @@ class TestMatMuklOp11(TestMatMulV2Op): self.y_shape = (1, 1, 100, 2) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp12(TestMatMulV2Op): @@ -224,11 +220,10 @@ class TestMatMuklOp12(TestMatMulV2Op): """ def config(self): - self.x_shape = (2, 1, 100, 2) - self.y_shape = (1, 1, 100, 2) + self.x_shape = (2, 1, 4, 25) + self.y_shape = (1, 1, 4, 25) self.trans_x = True self.trans_y = False - self.dtype = "float64" class TestMatMuklOp13(TestMatMulV2Op): @@ -237,11 +232,10 @@ class TestMatMuklOp13(TestMatMulV2Op): """ def config(self): - self.x_shape = (2, 2, 100, 2) - self.y_shape = (2, 2, 100, 2) + self.x_shape = (2, 2, 2, 50) + self.y_shape = (2, 2, 2, 50) self.trans_x = True self.trans_y = False - self.dtype = "float64" class TestMatMuklOp14(TestMatMulV2Op): @@ -254,7 +248,6 @@ class TestMatMuklOp14(TestMatMulV2Op): self.y_shape = (1, 2, 2, 100, 2) self.trans_x = True self.trans_y = False - self.dtype = "float64" class TestMatMuklOp15(TestMatMulV2Op): @@ -267,7 +260,6 @@ class TestMatMuklOp15(TestMatMulV2Op): self.y_shape = (1, 2, 2, 100, 1) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp16(TestMatMulV2Op): @@ -277,10 +269,9 @@ class TestMatMuklOp16(TestMatMulV2Op): def config(self): self.x_shape = (100) - self.y_shape = (1, 2, 2, 100, 1) + self.y_shape = (1, 2, 2, 100, 2) self.trans_x = False self.trans_y = False - self.dtype = "float64" class TestMatMuklOp17(TestMatMulV2Op): @@ -293,7 +284,54 @@ class TestMatMuklOp17(TestMatMulV2Op): self.y_shape = (100) self.trans_x = False self.trans_y = False - self.dtype = "float64" + + +#--------------------test matmul fp16-------------------- + + +def create_test_fp16_class(parent, atol=0.001, max_relative_error=1.0): + @unittest.skipIf(not core.is_compiled_with_cuda(), + "core is not compiled with CUDA") + class TestMatMulOpFp16Case(parent): + def init_kernel_type(self): + self.dtype = np.float16 + + def test_check_output(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_output_with_place(place, atol=atol) + + def test_check_grad(self): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + self.check_grad_with_place( + place, ['X', 'Y'], + 'Out', + max_relative_error=max_relative_error) + + cls_name = "{0}_{1}".format(parent.__name__, "Fp16") + TestMatMulOpFp16Case.__name__ = cls_name + globals()[cls_name] = TestMatMulOpFp16Case + + +create_test_fp16_class(TestMatMulV2Op) +create_test_fp16_class(TestMatMuklOp2) +create_test_fp16_class(TestMatMuklOp3) +create_test_fp16_class(TestMatMuklOp4) +create_test_fp16_class(TestMatMuklOp5) +create_test_fp16_class(TestMatMuklOp6) +create_test_fp16_class(TestMatMuklOp7) +create_test_fp16_class(TestMatMuklOp8) +create_test_fp16_class(TestMatMuklOp9) +create_test_fp16_class(TestMatMuklOp10) +create_test_fp16_class(TestMatMuklOp11) +create_test_fp16_class(TestMatMuklOp12) +create_test_fp16_class(TestMatMuklOp13) +create_test_fp16_class(TestMatMuklOp14) +create_test_fp16_class(TestMatMuklOp15) +create_test_fp16_class(TestMatMuklOp16) +create_test_fp16_class(TestMatMuklOp17) class TestMatMulV2API(unittest.TestCase): @@ -331,6 +369,17 @@ class TestMatMulV2API(unittest.TestCase): y = paddle.to_tensor(input_y) result = paddle.matmul(x, y) + def test_dygraph_fp16(self): + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + if core.is_float16_supported(place): + with fluid.dygraph.guard(place): + input_x = np.random.random([4, 3]).astype("float16") + input_y = np.random.random([3, 4]).astype("float16") + x = paddle.to_tensor(input_x) + y = paddle.to_tensor(input_y) + result = paddle.matmul(x, y) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index f27cfba487d..26624d3b5ff 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -156,8 +156,8 @@ def matmul(x, y, transpose_x=False, transpose_y=False, name=None): def __check_input(x, y): var_names = {'x': x, 'y': y} for name, val in var_names.items(): - check_variable_and_dtype(val, name, ['float32', 'float64'], - 'matmul') + check_variable_and_dtype( + val, name, ['float16', 'float32', 'float64'], 'matmul') __check_input(x, y) -- GitLab From c143326df5cf397451f1f4b931c955742c3e7f16 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Fri, 25 Sep 2020 15:47:30 +0800 Subject: [PATCH 098/117] try to fix test_paddle_save_load unknown timeout (#27536) * try to fix paddle save load test * open paddle save load * replace dataloader * remove dataloader --- .../fluid/tests/unittests/CMakeLists.txt | 3 -- .../tests/unittests/test_paddle_save_load.py | 49 +++++++++---------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 2f8952a4431..09797576801 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -335,9 +335,6 @@ list(REMOVE_ITEM TEST_OPS test_conv3d_transpose_op) # disable this unittest temporarily list(REMOVE_ITEM TEST_OPS test_imperative_data_loader_exception) list(REMOVE_ITEM TEST_OPS test_sampling_id_op) -list(REMOVE_ITEM TEST_OPS test_paddle_save_load) - - if (APPLE OR WIN32) list(REMOVE_ITEM TEST_OPS test_dataset) diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py index 74d44d0f8b6..fee34945586 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py @@ -29,19 +29,23 @@ IMAGE_SIZE = 784 CLASS_NUM = 10 -# define a random dataset -class RandomDataset(paddle.io.Dataset): - def __init__(self, num_samples): - self.num_samples = num_samples - - def __getitem__(self, idx): +def random_batch_reader(): + def _get_random_inputs_and_labels(): np.random.seed(SEED) - image = np.random.random([IMAGE_SIZE]).astype('float32') - label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + image = np.random.random([BATCH_SIZE, IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, ( + BATCH_SIZE, + 1, )).astype('int64') return image, label - def __len__(self): - return self.num_samples + def __reader__(): + for _ in range(BATCH_NUM): + batch_image, batch_label = _get_random_inputs_and_labels() + batch_image = paddle.to_tensor(batch_image) + batch_label = paddle.to_tensor(batch_label) + yield batch_image, batch_label + + return __reader__ class LinearNet(nn.Layer): @@ -66,8 +70,7 @@ def train(layer, loader, loss_fn, opt): class TestSaveLoad(unittest.TestCase): def setUp(self): # enable dygraph mode - self.place = paddle.CPUPlace() - paddle.disable_static(self.place) + paddle.disable_static() # config seed paddle.manual_seed(SEED) @@ -81,14 +84,8 @@ class TestSaveLoad(unittest.TestCase): adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) # create data loader - dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) - loader = paddle.io.DataLoader( - dataset, - places=self.place, - batch_size=BATCH_SIZE, - shuffle=True, - drop_last=True, - num_workers=2) + # TODO: using new DataLoader cause unknown Timeout on windows, replace it + loader = random_batch_reader() # train train(layer, loader, loss_fn, adam) @@ -103,8 +100,8 @@ class TestSaveLoad(unittest.TestCase): layer, opt = self.build_and_train_model() # save - layer_save_path = "linear.pdparams" - opt_save_path = "linear.pdopt" + layer_save_path = "test_paddle_save_load.linear.pdparams" + opt_save_path = "test_paddle_save_load.linear.pdopt" layer_state_dict = layer.state_dict() opt_state_dict = opt.state_dict() @@ -120,7 +117,7 @@ class TestSaveLoad(unittest.TestCase): # test save load in static mode paddle.enable_static() - static_save_path = "static_mode_test/linear.pdparams" + static_save_path = "static_mode_test/test_paddle_save_load.linear.pdparams" paddle.save(layer_state_dict, static_save_path) load_static_state_dict = paddle.load(static_save_path) self.check_load_state_dict(layer_state_dict, load_static_state_dict) @@ -133,15 +130,15 @@ class TestSaveLoad(unittest.TestCase): # 2. test save path format error with self.assertRaises(ValueError): - paddle.save(layer_state_dict, "linear.model/") + paddle.save(layer_state_dict, "test_paddle_save_load.linear.model/") # 3. test load path not exist error with self.assertRaises(ValueError): - paddle.load("linear.params") + paddle.load("test_paddle_save_load.linear.params") # 4. test load old save path error with self.assertRaises(ValueError): - paddle.load("linear") + paddle.load("test_paddle_save_load.linear") if __name__ == '__main__': -- GitLab From 8daccc9ea7dbabec034882575b3738cf5c4c1dcc Mon Sep 17 00:00:00 2001 From: ceci3 Date: Fri, 25 Sep 2020 16:25:49 +0800 Subject: [PATCH 099/117] Fix batch norm double grad compute (#27549) * fix bn double grad, test=develop * update, test=develop --- paddle/fluid/operators/batch_norm_op.cc | 55 ++++++++------ paddle/fluid/operators/instance_norm_op.cc | 6 +- paddle/fluid/operators/norm_utils.cu.h | 75 ++++++++++++++----- .../tests/unittests/test_norm_nn_grad.py | 36 +++++++++ 4 files changed, 131 insertions(+), 41 deletions(-) diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index dcfe8bb1bb4..7a88403aa9d 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -839,6 +839,7 @@ void BatchNormDoubleGradMaker::Apply(GradOpPtr op) const { op->SetInput("SavedMean", this->Input("SavedMean")); op->SetInput("SavedVariance", this->Input("SavedVariance")); if (BOOST_GET_CONST(bool, this->GetAttr("use_global_stats"))) { + op->SetInput("Mean", this->Input("Mean")); op->SetInput("Variance", this->Input("Variance")); } op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); @@ -868,14 +869,19 @@ void BatchNormDoubleGradOp::InferShape( "BatchNormDoubleGrad"); } - OP_INOUT_CHECK(ctx->HasInput("DDX"), "Input", "DDX", "BatchNormDoubleGrad"); OP_INOUT_CHECK(ctx->HasInput("DY"), "Input", "DY", "BatchNormDoubleGrad"); // check output OP_INOUT_CHECK(ctx->HasOutput("DX"), "Output", "DX", "BatchNormDoubleGrad"); const auto x_dims = ctx->GetInputDim("X"); - const int C = x_dims[1]; + const DataLayout data_layout = framework::StringToDataLayout( + ctx->Attrs().Get("data_layout")); + const int C = + ((this->IsMKLDNNType() == true) || (data_layout == DataLayout::kNCHW) + ? x_dims[1] + : x_dims[x_dims.size() - 1]); + if (ctx->HasOutput("DX")) { ctx->SetOutputDim("DX", x_dims); } @@ -957,7 +963,9 @@ class BatchNormDoubleGradKernel Tensor inv_var_tensor; if (use_global_stats) { + const auto *running_mean = ctx.Input("Mean"); const auto *running_variance = ctx.Input("Variance"); + mean_data = running_mean->data(); inv_var_tensor.Resize({C}); T *running_inv_var_data = inv_var_tensor.mutable_data(ctx.GetPlace()); @@ -1077,12 +1085,12 @@ class BatchNormDoubleGradKernel // (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * // np.sum(dy, // axis=(n,h,w)) * (x - mean) * - // (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - + // (np.mean(ddx, axis=(n,h,w)) - ddx)) + ddr * (dy * inv_var - // inv_var // * // np.mean(dy, axis=(n,h,w)) - // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), - // axis=(n,h,w)))) + // axis=(n,h,w))) if (ddX) { dx_arr += @@ -1176,7 +1184,8 @@ class BatchNormDoubleGradKernel C, sample_size); ddy_arr.setZero(); if (use_global_stats) { - // math: ddy = r * ddx * inv_var + // math: ddy = r * ddx * inv_var + ddbias + + // ddscale * (x - mean) * inv_var if (ddX) { ddy_arr = scale_tile_data * ddx_arr * inv_var_tile_data; } @@ -1196,25 +1205,29 @@ class BatchNormDoubleGradKernel .replicate(1, sample_size) / sample_size); } - if (ddScale && ddBias) { - ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); - Tensor ddscale_tile; - ddscale_tile.Resize({C, sample_size}); - EigenArrayMap ddscale_tile_data( - ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + } + if (ddScale) { + ConstEigenVectorArrayMap ddscale_arr(ddScale->data(), C); + Tensor ddscale_tile; + ddscale_tile.Resize({C, sample_size}); + EigenArrayMap ddscale_tile_data( + ddscale_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddscale_tile_data = ddscale_arr.replicate(1, sample_size); + + ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data; + } - ConstEigenVectorArrayMap ddbias_arr(ddBias->data(), C); - Tensor ddbias_tile; - ddbias_tile.Resize({C, sample_size}); - EigenArrayMap ddbias_tile_data( - ddbias_tile.mutable_data(ctx.GetPlace()), C, sample_size); - ddbias_tile_data = ddbias_arr.replicate(1, sample_size); + if (ddBias) { + ConstEigenVectorArrayMap ddbias_arr(ddBias->data(), C); + Tensor ddbias_tile; + ddbias_tile.Resize({C, sample_size}); + EigenArrayMap ddbias_tile_data( + ddbias_tile.mutable_data(ctx.GetPlace()), C, sample_size); + ddbias_tile_data = ddbias_arr.replicate(1, sample_size); - ddy_arr += x_sub_mean_mul_invstd_arr * ddscale_tile_data; - ddy_arr += ddbias_tile_data; - } + ddy_arr += ddbias_tile_data; } + if (data_layout == DataLayout::kNCHW) { VLOG(3) << "Transform batchnorm output from NHWC to NCHW"; TransToChannelFirst( diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index a5b270c1dfe..03279a9b2c1 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -520,11 +520,11 @@ class InstanceNormDoubleGradKernel // (np.mean(dy, axis=(h,w)) - dy) + inv_var.pow(3) / HxW * // np.sum(dy, // axis=(h,w)) * (x - mean) * - // (np.mean(ddx, axis=(h,w)) - ddx) + ddr * (dy * inv_var - inv_var - // * + // (np.mean(ddx, axis=(h,w)) - ddx)) + ddr * (dy * inv_var - + // inv_var * // np.mean(dy, axis=(h,w)) - // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), - // axis=(h,w)))) + // axis=(h,w))) Tensor x_sub_mean_mul_invstd; x_sub_mean_mul_invstd.Resize({sample_size, NxC}); diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index 07333f1ae11..02dcb4045f4 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -40,12 +40,12 @@ using DataLayout = framework::DataLayout; // (np.mean(dy, axis=(n,h,w)) - dy) + inv_var.pow(3) / NxHxW * // np.sum(dy, // axis=(n,h,w)) * (x - mean) * -// (np.mean(ddx, axis=(n,h,w)) - ddx) + ddr * (dy * inv_var - +// (np.mean(ddx, axis=(n,h,w)) - ddx)) + ddr * (dy * inv_var - // inv_var // * // np.mean(dy, axis=(n,h,w)) - // inv_var.pow(3) * (x - mean) * np.mean(dy * (x - mean), -// axis=(n,h,w)))) +// axis=(n,h,w))) template __global__ void DoubleGradComputeDX(const T *x, const T *mean, @@ -138,7 +138,7 @@ __global__ void DoubleGradComputeDX(const T *x, const T *mean, ? (j / sample_size * C + i) * sample_size + j % sample_size : j * outer_size + i; dx[index] += (dy[index] * var_val - dy_sum_val / inner_size * var_val - - (x[index] - mean_val) * var_val * + (x[index] - mean_val) * var_val * var_val * dy_mul_x_sub_mean_sum_val * var_val / inner_size) * ddscale[i]; } @@ -326,19 +326,57 @@ __global__ void DoubleGradComputeDScaleWithGlobal( } // math: dx = ddscale * dy * inv_var -// math: ddy = scale * ddx * inv_var template -__global__ void DoubleGradComputeDataWithGlobal( - const T *dy, const T *scale, const T *variance, const double epsilon, - const int C, const int sample_size, const int num, T *dx) { +__global__ void DoubleGradComputeDXWithGlobal(const T *dy, const T *ddscale, + const T *variance, + const double epsilon, const int C, + const int sample_size, + const int num, T *dx) { int gid = blockIdx.x * blockDim.x + threadIdx.x; int stride = blockDim.x * gridDim.x; - if (scale != nullptr) { + if (ddscale != nullptr) { for (int i = gid; i < num; i += stride) { const int c = layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; T inv_var = 1.0 / sqrt(variance[c] + epsilon); - dx[i] = dy[i] * scale[c] * inv_var; + dx[i] = dy[i] * ddscale[c] * inv_var; + } + } +} + +// math: ddy = scale * ddx * inv_var + ddbias + +// ddscale * (x - mean) * inv_var +template +__global__ void DoubleGradComputeDDYWithGlobal( + const T *ddx, const T *scale, const T *mean, const T *variance, const T *x, + const T *ddbias, const T *ddscale, const double epsilon, const int C, + const int sample_size, const int num, T *ddy) { + int gid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + if (ddx != nullptr) { + for (int i = gid; i < num; i += stride) { + const int c = + layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; + T inv_var = 1.0 / sqrt(variance[c] + epsilon); + ddy[i] += ddx[i] * scale[c] * inv_var; + } + } + __syncthreads(); + if (ddscale != nullptr) { + for (int i = gid; i < num; i += stride) { + const int c = + layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; + T inv_var = 1.0 / sqrt(variance[c] + epsilon); + ddy[i] += (x[i] - mean[c]) * inv_var * ddscale[c]; + } + } + __syncthreads(); + if (ddbias != nullptr) { + for (int i = gid; i < num; i += stride) { + const int c = + layout == framework::DataLayout::kNCHW ? i / sample_size % C : i % C; + ddy[i] += ddbias[c]; } } } @@ -383,8 +421,11 @@ void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, const T *mean_data, *variance_data; if (use_global_stats) { + const auto *running_mean = ctx.Input("Mean"); const auto *running_var = ctx.Input("Variance"); + const auto *running_mean_data = running_mean->template data(); const auto *running_var_data = running_var->template data(); + mean_data = running_mean_data; variance_data = running_var_data; } else { const T *smean_data = Saved_mean->data(); @@ -398,12 +439,12 @@ void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, set_constant(dev_ctx, dX, static_cast(0)); if (use_global_stats) { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDataWithGlobal< + DoubleGradComputeDXWithGlobal< T, DataLayout::kNHWC><<>>( dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, dx_data); } else { - DoubleGradComputeDataWithGlobal< + DoubleGradComputeDXWithGlobal< T, DataLayout::kNCHW><<>>( dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, dx_data); @@ -456,15 +497,15 @@ void NormDoubleGradFunctor(const framework::ExecutionContext &ctx, set_constant(dev_ctx, ddY, static_cast(0)); if (use_global_stats) { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDataWithGlobal< + DoubleGradComputeDDYWithGlobal< T, DataLayout::kNHWC><<>>( - ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, - ddy_data); + ddx_data, scale_data, mean_data, variance_data, x_data, ddbias_data, + ddscale_data, epsilon, C, sample_size, num, ddy_data); } else { - DoubleGradComputeDataWithGlobal< + DoubleGradComputeDDYWithGlobal< T, DataLayout::kNCHW><<>>( - ddx_data, scale_data, variance_data, epsilon, C, sample_size, num, - ddy_data); + ddx_data, scale_data, mean_data, variance_data, x_data, ddbias_data, + ddscale_data, epsilon, C, sample_size, num, ddy_data); } } else { if (data_layout == DataLayout::kNHWC) { diff --git a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py index a89b9fde7f9..cb4bd16ce21 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py @@ -130,5 +130,41 @@ class TestBatchNormDoubleGradCheckCase4(TestBatchNormDoubleGradCheck): self.shape = [2, 2, 3, 4, 5] +class TestBatchNormDoubleGradCheckCase5(TestBatchNormDoubleGradCheck): + @prog_scope() + def func(self, place): + prog = fluid.Program() + with fluid.program_guard(prog): + np.random.seed() + dtype = "float32" + eps = 0.005 + atol = 2e-4 + chn = self.shape[1] if self.data_layout == 'NCHW' else self.shape[ + -1] + x = layers.create_parameter(dtype=dtype, shape=self.shape, name='x') + z = fluid.layers.batch_norm( + input=x, + data_layout=self.data_layout, + use_global_stats=self.use_global_stats) + x_arr = np.random.uniform(-1, 1, self.shape).astype(dtype) + w, b = prog.global_block().all_parameters()[1:3] + w_arr = np.ones(chn).astype(dtype) + b_arr = np.zeros(chn).astype(dtype) + gradient_checker.double_grad_check( + [x, w, b], + z, + x_init=[x_arr, w_arr, b_arr], + atol=atol, + place=place, + eps=eps) + + +class TestBatchNormDoubleGradCheckCase6(TestBatchNormDoubleGradCheckCase5): + def init_test(self): + self.data_layout = 'NCHW' + self.use_global_stats = True + self.shape = [2, 3, 4, 5] + + if __name__ == "__main__": unittest.main() -- GitLab From 6f69a4cb059119176f556a0aac0253d2899c6b59 Mon Sep 17 00:00:00 2001 From: Thunderbrook <52529258+Thunderbrook@users.noreply.github.com> Date: Fri, 25 Sep 2020 16:39:22 +0800 Subject: [PATCH 100/117] add xpu in heter mode (#27000) * add xpu in heter mode test=develop * BOOST_CONST_GET; PADDLE_THROW test=develop * code style test=develop * code style test=develop * code style test=develop * refine test=develop * refine test=develop * refine test=develop * refine code test=develop --- cmake/third_party.cmake | 8 +- paddle/fluid/framework/device_worker.h | 5 +- paddle/fluid/framework/fleet/fleet_wrapper.cc | 50 +++++++ paddle/fluid/framework/fleet/fleet_wrapper.h | 8 ++ paddle/fluid/framework/fleet/heter_wrapper.cc | 59 ++++++-- paddle/fluid/framework/heterxpu_trainer.cc | 127 ++++++++++++++++-- paddle/fluid/framework/pull_dense_worker.cc | 20 ++- paddle/fluid/framework/trainer.h | 17 ++- paddle/fluid/framework/trainer_factory.cc | 3 +- python/paddle/fluid/executor.py | 2 +- 10 files changed, 268 insertions(+), 31 deletions(-) diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index ffd32cc78f0..1eb2096af91 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -270,6 +270,10 @@ if(WITH_PSLIB) endif() endif(WITH_PSLIB) +if(NOT WIN32 AND NOT APPLE) + include(external/gloo) + list(APPEND third_party_deps extern_gloo) +endif() if(WITH_BOX_PS) include(external/box_ps) @@ -277,10 +281,6 @@ if(WITH_BOX_PS) endif(WITH_BOX_PS) if(WITH_DISTRIBUTE) - if(WITH_GLOO) - include(external/gloo) - list(APPEND third_party_deps extern_gloo) - endif() if(WITH_GRPC) list(APPEND third_party_deps extern_grpc) diff --git a/paddle/fluid/framework/device_worker.h b/paddle/fluid/framework/device_worker.h index ee2ef9a0c3d..f6f3098613b 100644 --- a/paddle/fluid/framework/device_worker.h +++ b/paddle/fluid/framework/device_worker.h @@ -74,7 +74,9 @@ class PullDenseWorker { virtual void Initialize(const TrainerDesc& param); #ifdef PADDLE_WITH_CUDA void AddStream(const cudaStream_t stream) { copy_streams_.push_back(stream); } +#endif +#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_XPU) void AddPlace(const paddle::platform::Place place) { places_.push_back(place); } @@ -135,9 +137,9 @@ class PullDenseWorker { #ifdef PADDLE_WITH_CUDA std::vector copy_streams_; +#endif std::vector places_; std::vector thread_scopes_; -#endif }; // should incorporate different type of device @@ -161,6 +163,7 @@ class DeviceWorker { virtual void SetDataFeed(DataFeed* data_feed); virtual void SetWorkerNum(int num) {} virtual void CacheProgram(const ProgramDesc& main_program) {} + virtual void GetXpuOpIndex() {} virtual void SetNeedDumpField(bool need_dump_field) { need_dump_field_ = need_dump_field; } diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.cc b/paddle/fluid/framework/fleet/fleet_wrapper.cc index 3c076805932..693073d1fc7 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.cc +++ b/paddle/fluid/framework/fleet/fleet_wrapper.cc @@ -745,7 +745,57 @@ void FleetWrapper::PushDenseVarsAsync( push_sparse_status->push_back(std::move(status)); } } +#endif + +#ifdef PADDLE_WITH_XPU +void FleetWrapper::PushDenseVarsAsync( + const Scope& scope, const uint64_t table_id, + const std::vector& var_names, + std::vector<::std::future>* push_sparse_status, + float scale_datanorm, int batch_size, + const paddle::platform::Place& place) { +#ifdef PADDLE_WITH_PSLIB + std::vector regions; + for (auto& t : var_names) { + Variable* var = scope.FindVar(t); + LoDTensor* tensor = var->GetMutable(); + int count = tensor->numel(); + float* g_data = tensor->data(); + + Variable* pin_var = scope.FindVar(t + "pin"); + LoDTensor* pin_tensor = pin_var->GetMutable(); + float* pin_g = + pin_tensor->mutable_data(tensor->dims(), platform::CPUPlace()); + memory::Copy(platform::CPUPlace(), pin_g, + BOOST_GET_CONST(platform::XPUPlace, place), g_data, + sizeof(float) * count); + + float* g = pin_g; + if (scale_datanorm >= 0) { + if (t.find(".batch_size@GRAD") != std::string::npos || + t.find(".batch_sum@GRAD") != std::string::npos) { + Eigen::Map mat(g, 1, count); + float scale = 1.0 / batch_size; + mat *= scale; + } else if (t.find(".batch_square_sum@GRAD") != std::string::npos) { + VLOG(3) << "epsilon: " << scale_datanorm; + for (int i = 0; i < count; ++i) { + g[i] = (g[i] - batch_size * scale_datanorm) / batch_size + + batch_size * scale_datanorm; + } + } + } + paddle::ps::Region reg(g, count); + regions.emplace_back(std::move(reg)); + } + auto status = pslib_ptr_->_worker_ptr->push_dense(regions.data(), + regions.size(), table_id); + if (push_sparse_status) { + push_sparse_status->push_back(std::move(status)); + } +#endif +} #endif void FleetWrapper::PushDenseVarsAsync( const Scope& scope, const uint64_t table_id, diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.h b/paddle/fluid/framework/fleet/fleet_wrapper.h index be87bdf1e75..ae86835f38d 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.h +++ b/paddle/fluid/framework/fleet/fleet_wrapper.h @@ -160,6 +160,14 @@ class FleetWrapper { float scale_datanorm, int batch_size, const paddle::platform::Place& place, cudaStream_t stream, cudaEvent_t event); +#endif +#ifdef PADDLE_WITH_XPU + void PushDenseVarsAsync( + const Scope& scope, const uint64_t table_id, + const std::vector& var_names, + std::vector<::std::future>* push_sparse_status, + float scale_datanorm, int batch_size, + const paddle::platform::Place& place); #endif void PushDenseVarsAsync( const Scope& scope, const uint64_t table_id, diff --git a/paddle/fluid/framework/fleet/heter_wrapper.cc b/paddle/fluid/framework/fleet/heter_wrapper.cc index 7a27b6a9d7a..8e232560ab6 100644 --- a/paddle/fluid/framework/fleet/heter_wrapper.cc +++ b/paddle/fluid/framework/fleet/heter_wrapper.cc @@ -113,30 +113,66 @@ void HeterWrapper::SerializeToReq(const std::string& varname, Scope* scope, if (platform::is_cpu_place(tensor->place())) { memcpy(data_ptr, tensor->data(), tensor->numel() * SizeOfType(tensor->type())); -#ifdef PADDLE_WITH_CUDA } else { +#ifdef PADDLE_WITH_CUDA memory::Copy(platform::CPUPlace(), data_ptr, BOOST_GET_CONST(platform::CUDAPlace, tensor->place()), tensor->data(), tensor->numel() * SizeOfType(tensor->type()), nullptr); - } -#else - } #endif +#ifdef PADDLE_WITH_XPU + memory::Copy(platform::CPUPlace(), data_ptr, + BOOST_GET_CONST(platform::XPUPlace, tensor->place()), + tensor->data(), + tensor->numel() * SizeOfType(tensor->type())); +#endif + } } -// void HeterWrapper::DeSerializeToTensor(Scope* scope, -// const HeterRequest* request) { #ifdef PADDLE_WITH_CUDA void HeterWrapper::DeSerializeToTensor(Scope* scope, const VariableMessage& req_var, platform::Place place, cudaStream_t stream) { + // const VariableMessage& req_var = request->vars(); + auto* var = scope->FindVar(req_var.varname()); + auto* tensor = var->GetMutable(); + + std::vector vec_dim; + for (auto& x : req_var.dims()) { + vec_dim.push_back(x); + } + tensor->Resize(make_ddim(vec_dim)); + + LoD lod; + for (int i = 0; i < req_var.lod_level(); ++i) { + framework::Vector v; + for (int j = 0; j < req_var.lod(i).lod_data_size(); ++j) { + v.push_back(req_var.lod(i).lod_data(j)); + } + lod.push_back(v); + } + tensor->set_lod(lod); + + void* tensor_data = + tensor->mutable_data(place, ToVarType(req_var.data_type())); + +#ifdef PADDLE_WITH_CUDA + memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, place), tensor_data, + platform::CPUPlace(), req_var.data().data(), + tensor->numel() * SizeOfType(tensor->type()), stream); #else + memcpy(tensor_data, req_var.data().data(), + tensor->numel() * SizeOfType(tensor->type())); +#endif +} +#endif + +// void HeterWrapper::DeSerializeToTensor(Scope* scope, +// const HeterRequest* request) { void HeterWrapper::DeSerializeToTensor(Scope* scope, const VariableMessage& req_var, platform::Place place) { -#endif // const VariableMessage& req_var = request->vars(); auto* var = scope->FindVar(req_var.varname()); auto* tensor = var->GetMutable(); @@ -160,10 +196,10 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope, void* tensor_data = tensor->mutable_data(place, ToVarType(req_var.data_type())); -#ifdef PADDLE_WITH_CUDA - memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, place), tensor_data, +#ifdef PADDLE_WITH_XPU + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, place), tensor_data, platform::CPUPlace(), req_var.data().data(), - tensor->numel() * SizeOfType(tensor->type()), stream); + tensor->numel() * SizeOfType(tensor->type())); #else memcpy(tensor_data, req_var.data().data(), tensor->numel() * SizeOfType(tensor->type())); @@ -184,7 +220,8 @@ framework::proto::VarType::Type HeterWrapper::ToVarType( case VariableMessage::BOOL: return framework::proto::VarType::BOOL; // NOLINT default: - VLOG(0) << "Not support type " << type; + PADDLE_THROW(platform::errors::InvalidArgument( + "ToVarType:Unsupported type %d", type)); } } diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc index fbed74800b4..6bbbaacdde3 100644 --- a/paddle/fluid/framework/heterxpu_trainer.cc +++ b/paddle/fluid/framework/heterxpu_trainer.cc @@ -12,9 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#if (defined PADDLE_WITH_CUDA) && (defined PADDLE_WITH_PSLIB) +#include +#include +#include +#include +#include "io/fs.h" +#include "paddle/fluid/framework/data_feed_factory.h" +#include "paddle/fluid/framework/data_set.h" +#include "paddle/fluid/framework/device_worker_factory.h" +#include "paddle/fluid/framework/fleet/fleet_wrapper.h" +#include "paddle/fluid/framework/trainer.h" +#if (defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU) && \ + (defined PADDLE_WITH_PSLIB) +#ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/cuda_device_guard.h" - +#endif namespace paddle { namespace framework { @@ -34,6 +46,7 @@ void HeterXpuTrainer::Initialize(const TrainerDesc& trainer_desc, int place_num = trainer_desc.worker_places_size(); for (int i = 0; i < place_num; ++i) { int num = trainer_desc.worker_places(i); +#ifdef PADDLE_WITH_CUDA platform::CUDAPlace place = platform::CUDAPlace(num); platform::CUDADeviceGuard guard(place.device); cudaStream_t stream; @@ -44,6 +57,11 @@ void HeterXpuTrainer::Initialize(const TrainerDesc& trainer_desc, PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&event, cudaEventDisableTiming)); events_.push_back(event); +#endif +#ifdef PADDLE_WITH_XPU + platform::XPUPlace place = platform::XPUPlace(num); + places_.push_back(place); +#endif } // thread_num_ = trainer_desc.thread_num(); // SetDataset(dataset); @@ -95,11 +113,17 @@ void HeterXpuTrainer::Initialize(const TrainerDesc& trainer_desc, void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) { auto place = places_[num]; Scope* scope = place_scopes_[num]; +#ifdef PADDLE_WITH_CUDA auto stream = copy_streams_[num]; auto event = events_[num]; - auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; platform::CUDADeviceGuard guard(dev_id); +#endif + +#ifdef PADDLE_WITH_XPU + xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device); +#endif + auto& block = program.Block(0); for (auto& var : block.AllVars()) { if (var->Persistable()) { @@ -116,13 +140,28 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) { HeterMemCpy(thread_tensor, root_tensor, place, stream); \ } \ } while (0) + +#define HeterMemcpyXpuFunc(cpp_type, proto_type) \ + do { \ + if (root_tensor->type() == proto_type) { \ + HeterMemCpy(thread_tensor, root_tensor, place); \ + } \ + } while (0) +#ifdef PADDLE_WITH_CUDA _ForEachDataType_(HeterMemcpyFunc); +#endif +#ifdef PADDLE_WITH_XPU + _ForEachDataType_(HeterMemcpyXpuFunc); +#endif } } +#ifdef PADDLE_WITH_CUDA PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event, stream)); cudaEventSynchronize(event); +#endif } +#ifdef PADDLE_WITH_CUDA template void HeterXpuTrainer::HeterMemCpy(LoDTensor* thread_tensor, LoDTensor* root_tensor, @@ -141,6 +180,27 @@ void HeterXpuTrainer::HeterMemCpy(LoDTensor* thread_tensor, root_ptr, sizeof(T) * root_tensor->numel(), stream); } } +#endif + +#ifdef PADDLE_WITH_XPU +template +void HeterXpuTrainer::HeterMemCpy(LoDTensor* thread_tensor, + LoDTensor* root_tensor, + const paddle::platform::Place& thread_place) { + T* thread_ptr = + thread_tensor->mutable_data(root_tensor->dims(), thread_place); + T* root_ptr = root_tensor->data(); + if (platform::is_cpu_place(root_tensor->place())) { + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, thread_place), thread_ptr, + platform::CPUPlace(), root_ptr, + sizeof(T) * root_tensor->numel()); + } else { + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, thread_place), thread_ptr, + BOOST_GET_CONST(platform::XPUPlace, root_tensor->place()), + root_ptr, sizeof(T) * root_tensor->numel()); + } +} +#endif void HeterXpuTrainer::DumpWork(int tid) {} @@ -171,13 +231,16 @@ void HeterXpuTrainer::InitOtherEnv(const ProgramDesc& main_program) { CreateThreadParam(main_program, i); pull_dense_worker_->AddThreadScope(scope); pull_dense_worker_->AddPlace(places_[i]); +#ifdef PADDLE_WITH_CUDA pull_dense_worker_->AddStream(copy_streams_[i]); +#endif } - pull_dense_worker_->Start(); +#ifdef PADDLE_WITH_CUDA for (auto& stream : copy_streams_) { cudaStreamSynchronize(stream); } +#endif op_names_.clear(); for (auto& op_desc : block.AllOps()) { std::unique_ptr local_op = OpRegistry::CreateOp(*op_desc); @@ -220,10 +283,12 @@ void HeterXpuTrainer::InitOtherEnv(const ProgramDesc& main_program) { OperatorBase* local_op_ptr = local_op.release(); (context->ops_).push_back(local_op_ptr); } +#ifdef PADDLE_WITH_CUDA auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; platform::CUDADeviceGuard guard(dev_id); PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&context->event_, cudaEventDisableTiming)); +#endif object_pool_.Push(context); } } @@ -267,12 +332,25 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, } \ } while (0) _ForEachDataType_(MergeCallback); - if (platform::is_gpu_place(thread_tensor->place())) { + if (!platform::is_cpu_place(thread_tensor->place())) { +#ifdef PADDLE_WITH_CUDA auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, thread_tensor->place()).device; platform::CUDADeviceGuard guard(dev_id); cudaMemset(thread_tensor->data(), 0, thread_tensor->numel() * SizeOfType(thread_tensor->type())); +#endif +#ifdef PADDLE_WITH_XPU + auto place = thread_tensor->place(); + xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device); + platform::DeviceContextPool& pool = + platform::DeviceContextPool::Instance(); + platform::DeviceContext* dev_ctx = pool.Get(place); + const platform::XPUDeviceContext* xpu_ctx = + reinterpret_cast(dev_ctx); + xpu::memset(xpu_ctx->x_context(), thread_tensor->data(), 0, + thread_tensor->numel() * SizeOfType(thread_tensor->type())); +#endif } else { memset(thread_tensor->data(), 0, thread_tensor->numel() * SizeOfType(thread_tensor->type())); @@ -281,12 +359,25 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request, auto* merge_var = response->add_vars(); heter_ptr_->SerializeToReq(need_merge_var_names_[i], root_scope_, merge_var); - if (platform::is_gpu_place(root_tensor->place())) { + if (!platform::is_cpu_place(root_tensor->place())) { +#ifdef PADDLE_WITH_CUDA auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, root_tensor->place()).device; platform::CUDADeviceGuard guard(dev_id); cudaMemset(root_tensor->data(), 0, root_tensor->numel() * SizeOfType(root_tensor->type())); +#endif +#ifdef PADDLE_WITH_XPU + auto place = root_tensor->place(); + xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device); + platform::DeviceContextPool& pool = + platform::DeviceContextPool::Instance(); + platform::DeviceContext* dev_ctx = pool.Get(place); + const platform::XPUDeviceContext* xpu_ctx = + reinterpret_cast(dev_ctx); + xpu::memset(xpu_ctx->x_context(), root_tensor->data(), 0, + root_tensor->numel() * SizeOfType(root_tensor->type())); +#endif } else { memset(root_tensor->data(), 0, root_tensor->numel() * SizeOfType(root_tensor->type())); @@ -346,11 +437,12 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, OperatorBase* local_op_ptr = local_op.release(); (context->ops_).push_back(local_op_ptr); } - +#ifdef PADDLE_WITH_CUDA auto dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; platform::CUDADeviceGuard guard(dev_id); PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&context->event_, cudaEventDisableTiming)); +#endif } context->Reset(); @@ -359,15 +451,22 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, auto deserial_timer = std::make_shared("xpu_service_deserial"); for (int i = 0; i < request->vars_size(); ++i) { +#ifdef PADDLE_WITH_CUDA heter_ptr_->DeSerializeToTensor(context->scope_, request->vars(i), place, copy_streams_[context->place_num_]); +#endif +#ifdef PADDLE_WITH_XPU + heter_ptr_->DeSerializeToTensor(context->scope_, request->vars(i), place); +#endif } +#ifdef PADDLE_WITH_CUDA PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventRecord(context->event_, copy_streams_[context->place_num_])); while (cudaEventQuery(context->event_) != cudaSuccess) { VLOG(3) << "wait for kernel"; bthread_yield(); } +#endif } { @@ -378,6 +477,7 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, op->Run(*(context->scope_), place); } } +#ifdef PADDLE_WITH_CUDA auto* dev_ctx = static_cast( platform::DeviceContextPool::Instance().Get(place)); PADDLE_ENFORCE_CUDA_SUCCESS( @@ -391,6 +491,10 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, bthread_yield(); } } +#endif +#ifdef PADDLE_WITH_XPU + xpu_wait(); +#endif for (int i = 0; i < trainer_desc_.xpu_send_list_size(); ++i) { const std::string& varname = trainer_desc_.xpu_send_list(i); @@ -407,11 +511,19 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request, ++i) { uint64_t tid = static_cast(param_.program_config(0).push_dense_table_id(i)); +#ifdef PADDLE_WITH_CUDA fleet_ptr_->PushDenseVarsAsync( *(context->scope_), tid, dense_grad_names_[tid], &(context->push_dense_status_), scale_datanorm_, request->cur_batch(), places_[context->place_num_], copy_streams_[context->place_num_], context->event_); +#endif +#ifdef PADDLE_WITH_XPU + fleet_ptr_->PushDenseVarsAsync( + *(context->scope_), tid, dense_grad_names_[tid], + &(context->push_dense_status_), scale_datanorm_, request->cur_batch(), + places_[context->place_num_]); +#endif } for (int i = 0; i < param_.program_config(0).push_dense_table_id_size(); ++i) { @@ -453,7 +565,6 @@ void HeterXpuTrainer::Finalize() { pull_dense_worker_->Stop(); root_scope_->DropKids(); } - } // namespace framework } // namespace paddle #endif diff --git a/paddle/fluid/framework/pull_dense_worker.cc b/paddle/fluid/framework/pull_dense_worker.cc index c399c5d02eb..6aeef8a39b5 100644 --- a/paddle/fluid/framework/pull_dense_worker.cc +++ b/paddle/fluid/framework/pull_dense_worker.cc @@ -62,13 +62,15 @@ void PullDenseWorker::Initialize(const TrainerDesc& param) { fleet_ptr_ = FleetWrapper::GetInstance(); #ifdef PADDLE_WITH_CUDA copy_streams_.clear(); +#endif +#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_XPU) places_.clear(); thread_scopes_.clear(); #endif } void PullDenseWorker::CreatePinVar() { -#ifdef PADDLE_WITH_CUDA +#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_PSLIB) // for (auto& v : dense_value_names_) { // for (auto& name : v.second) { for (int i = 0; i < dwp_param_.program_config(0).pull_dense_table_id_size(); @@ -83,8 +85,13 @@ void PullDenseWorker::CreatePinVar() { auto* ptr = root_scope_->Var(name + "pin"); InitializeVariable(ptr, proto::VarType::LOD_TENSOR); LoDTensor* pin_tensor = ptr->GetMutable(); +#ifdef PADDLE_WITH_CUDA pin_tensor->mutable_data(tensor->dims(), platform::CUDAPinnedPlace()); +#endif +#ifdef PADDLE_WITH_XPU + pin_tensor->mutable_data(tensor->dims(), platform::CPUPlace()); +#endif } } #endif @@ -107,7 +114,7 @@ void PullDenseWorker::Wait(std::vector<::std::future>* status_vec) { exit(-1); } status_vec->resize(0); -#ifdef PADDLE_WITH_CUDA +#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_XPU) for (size_t i = 0; i < places_.size(); ++i) { // for (auto& v : dense_value_names_) { @@ -125,9 +132,16 @@ void PullDenseWorker::Wait(std::vector<::std::future>* status_vec) { Variable* var = thread_scopes_[i]->FindVar(name); LoDTensor* tensor = var->GetMutable(); float* w = tensor->data(); +#ifdef PADDLE_WITH_CUDA memory::Copy(BOOST_GET_CONST(platform::CUDAPlace, places_[i]), w, platform::CUDAPinnedPlace(), pin_w, sizeof(float) * tensor->numel(), copy_streams_[i]); +#endif +#ifdef PADDLE_WITH_XPU + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, places_[i]), w, + platform::CPUPlace(), pin_w, + sizeof(float) * tensor->numel()); +#endif } } } @@ -148,7 +162,7 @@ void PullDenseWorker::PullDense(bool force_update) { uint64_t tid = static_cast( dwp_param_.program_config(0).pull_dense_table_id(i)); if (force_update || CheckUpdateParam(tid)) { -#ifdef PADDLE_WITH_CUDA +#if (defined PADDLE_WITH_CUDA) || (defined PADDLE_WITH_XPU) VLOG(3) << "pull dense " << force_update << " " << tid; fleet_ptr_->PullDenseVarsAsync(*root_scope_, tid, dense_value_names_[tid], &pull_dense_status_, false); diff --git a/paddle/fluid/framework/trainer.h b/paddle/fluid/framework/trainer.h index d041ef48e2c..ecaec49aa46 100644 --- a/paddle/fluid/framework/trainer.h +++ b/paddle/fluid/framework/trainer.h @@ -138,7 +138,8 @@ class DistMultiTrainer : public MultiTrainer { std::shared_ptr pull_dense_worker_; }; -#if (defined PADDLE_WITH_CUDA) && (defined PADDLE_WITH_PSLIB) +#if (defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU) && \ + (defined PADDLE_WITH_PSLIB) class HeterServiceContext { public: HeterServiceContext() {} @@ -151,7 +152,9 @@ class HeterServiceContext { void Reset() { push_dense_status_.clear(); } int place_num_; Scope* scope_{nullptr}; +#ifdef PADDLE_WITH_CUDA cudaEvent_t event_; +#endif std::vector ops_; std::vector<::std::future> push_dense_status_; }; @@ -178,10 +181,18 @@ class HeterXpuTrainer : public TrainerBase { virtual void CacheProgram(const ProgramDesc& main_program) { new (&program_) ProgramDesc(main_program); } + virtual std::string GetDumpPath(int tid) { return ""; } + virtual void InitDumpEnv() {} template +#ifdef PADDLE_WITH_CUDA void HeterMemCpy(LoDTensor* tensor, LoDTensor* root_tensor, const paddle::platform::Place& thread_place, cudaStream_t stream); +#endif +#ifdef PADDLE_WITH_XPU + void HeterMemCpy(LoDTensor* thread_tensor, LoDTensor* root_tensor, + const paddle::platform::Place& thread_place); +#endif void CreateThreadParam(const ProgramDesc& program, int num); template void MergeToRootScope(LoDTensor* root_tensor, LoDTensor* thread_tensor); @@ -207,9 +218,11 @@ class HeterXpuTrainer : public TrainerBase { std::vector op_names_; std::vector place_scopes_; BtObjectPool object_pool_; - std::vector copy_streams_; std::vector places_; +#ifdef PADDLE_WITH_CUDA + std::vector copy_streams_; std::vector events_; +#endif }; #endif diff --git a/paddle/fluid/framework/trainer_factory.cc b/paddle/fluid/framework/trainer_factory.cc index 15584620d86..cc92c50cc42 100644 --- a/paddle/fluid/framework/trainer_factory.cc +++ b/paddle/fluid/framework/trainer_factory.cc @@ -63,7 +63,8 @@ std::shared_ptr TrainerFactory::CreateTrainer( REGISTER_TRAINER_CLASS(MultiTrainer); REGISTER_TRAINER_CLASS(DistMultiTrainer); -#if (defined PADDLE_WITH_CUDA) && (defined PADDLE_WITH_PSLIB) +#if (defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU) && \ + (defined PADDLE_WITH_PSLIB) REGISTER_TRAINER_CLASS(HeterXpuTrainer); #endif #if defined(PADDLE_WITH_NCCL) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 2e3f34f4164..3dc30767e5a 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -1355,7 +1355,7 @@ class Executor(object): if not program._fleet_opt is None: if program._fleet_opt.get("worker_class", "") == "HeterCpuWorker": is_heter = 1 - if program._fleet_opt("trainer", "") == "HeterXpuTrainer": + if program._fleet_opt.get("trainer", "") == "HeterXpuTrainer": is_heter = 1 if scope is None: scope = global_scope() -- GitLab From 09f1953296232d4b2f1ad823fb060a8ed3b2eaa9 Mon Sep 17 00:00:00 2001 From: chalsliu <45041955+chalsliu@users.noreply.github.com> Date: Fri, 25 Sep 2020 16:45:02 +0800 Subject: [PATCH 101/117] Revert "Disable ut quickly." This reverts commit 29f1560d8fbb1e516dfac5c609e6e869196475a5. --- paddle/scripts/paddle_build.sh | 5 ---- tools/check_file_diff_approvals.sh | 2 +- tools/is_ut_disabled.py | 40 ------------------------------ 3 files changed, 1 insertion(+), 46 deletions(-) delete mode 100644 tools/is_ut_disabled.py diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index ac6531a2cc5..69303013d2a 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -988,11 +988,6 @@ set +x fi read testcase <<< $(echo "$line"|grep -oEi "\w+$") - if python $PADDLE_ROOT/tools/is_ut_disabled.py $testcase; then - echo $testcase" is disabled." - continue - fi - if [[ "$is_nightly" != "" ]] && [ ${NIGHTLY_MODE:-OFF} == "OFF" ]; then echo $testcase" will only run at night." continue diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 16e61d7c77a..84254cc89bb 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -286,7 +286,7 @@ fi # Get the list of PR authors with unresolved unit test issues pip install PyGithub # For getting PR related data -wget https://sys-p0.bj.bcebos.com/blk/block.txt --no-check-certificate +wget https://paddle-ci.gz.bcebos.com/blk/block.txt --no-check-certificate wget https://sys-p0.bj.bcebos.com/bk-ci/bk.txt --no-check-certificate HASUTFIXED=`python ${PADDLE_ROOT}/tools/check_ut.py | grep "has unit-test to be fixed" || true` if [ "${HASUTFIXED}" != "" ]; then diff --git a/tools/is_ut_disabled.py b/tools/is_ut_disabled.py deleted file mode 100644 index a21fe39e71e..00000000000 --- a/tools/is_ut_disabled.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" Check whether ut is disabled. """ - -import os -import sys - - -def check_ut(): - """ Get disabled unit tests. """ - disable_ut_file = 'disable_ut' - cmd = 'wget -q --no-check-certificate https://sys-p0.bj.bcebos.com/prec/{}'.format( - disable_ut_file) - os.system(cmd) - with open(disable_ut_file) as utfile: - for u in utfile: - if u.rstrip('\r\n') == sys.argv[1]: - exit(0) - exit(1) - - -if __name__ == '__main__': - if len(sys.argv) != 2: - exit(1) - try: - check_ut() - except Exception as e: - print(e) - exit(1) -- GitLab From a5b32637825e19f7527c09878ba2994314929d54 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 25 Sep 2020 20:51:21 +0800 Subject: [PATCH 102/117] Refine error msg in paddle/fluid/imperative (#27521) * refine err msg * follow comments --- .../fluid/imperative/gradient_accumulator.cc | 12 +++- .../imperative/jit/program_desc_tracer.cc | 13 ++-- paddle/fluid/imperative/nccl_context.cc | 59 +++++++++++++------ 3 files changed, 58 insertions(+), 26 deletions(-) diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 7caeb4378ce..07f1868b7fa 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/imperative/gradient_accumulator.h" + #include #include #include + #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" @@ -136,9 +138,13 @@ void TensorAdd(const framework::Variable& src, framework::Variable* dst) { return; } - PADDLE_ENFORCE_EQ(dst_tensor->numel() == numel, true, - "dst_numel %d vs. src_numel %d", dst_tensor->numel(), - numel); + PADDLE_ENFORCE_EQ( + dst_tensor->numel(), numel, + platform::errors::PreconditionNotMet( + "The number of elements of source tensor and destination tensor " + "should be equal, but got the number of elements of source tensor is " + "%zu and the number of elements of destination tensor is %zu.", + numel, dst_tensor->numel())); auto data_type = src_tensor.type(); auto place = src_tensor.place(); diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc index 9f4cf713f7c..59ff5b4eae4 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.cc +++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/jit/program_desc_tracer.h" + #include #include @@ -203,7 +204,8 @@ TracedProgramTuple ProgramDescTracer::CreateProgramDesc( void ProgramDescTracer::InsertVarIfNotExist( const std::shared_ptr &new_var, bool is_input) { - PADDLE_ENFORCE_NOT_NULL(new_var); + PADDLE_ENFORCE_NOT_NULL(new_var, platform::errors::InvalidArgument( + "The variable to insert is NULL.")); if (vars_.count(new_var) != 0) return; auto new_var_desc = new framework::VarDesc(""); @@ -220,7 +222,9 @@ void ProgramDescTracer::InsertVarIfNotExist( } const auto &inner_var = new_var->Var(); - PADDLE_ENFORCE_EQ(inner_var.IsInitialized(), true); + PADDLE_ENFORCE_EQ(inner_var.IsInitialized(), true, + platform::errors::InvalidArgument( + "The variable to insert is not initialized.")); if (inner_var.IsType()) { const auto &tensor = inner_var.Get(); new_var_desc->SetType(framework::proto::VarType::LOD_TENSOR); @@ -232,8 +236,9 @@ void ProgramDescTracer::InsertVarIfNotExist( new_var_desc->SetDataType(framework::proto::VarType::FP32); } } else { - PADDLE_THROW("Not support variable type %s", - framework::ToTypeName(inner_var.Type())); + PADDLE_THROW(platform::errors::InvalidArgument( + "Not support variable type %s.", + framework::ToTypeName(inner_var.Type()))); } } diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index 115078e7ead..c8fd31fcbff 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/nccl_context.h" + #include "paddle/fluid/platform/collective_helper.h" namespace paddle { @@ -21,8 +22,10 @@ namespace imperative { void NCCLParallelContext::RecvNCCLID(const std::string &ep, ncclUniqueId *nccl_id) { auto addr = paddle::string::Split(ep, ':'); - PADDLE_ENFORCE_EQ(addr.size(), 2UL, - "The endpoint should contain host and port: %s", ep); + PADDLE_ENFORCE_EQ( + addr.size(), 2UL, + platform::errors::InvalidArgument( + "The endpoint should contain host and port, but got %s.", ep)); std::string host = addr[0]; int port = std::stoi(addr[1]); @@ -32,27 +35,41 @@ void NCCLParallelContext::RecvNCCLID(const std::string &ep, char buffer[1024] = {0}; int opt = 0; // creating socket fd - if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) - PADDLE_THROW("create server fd failed"); - if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) - PADDLE_THROW("set socket opt failed"); + if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) { + PADDLE_THROW( + platform::errors::Unavailable("Create server file descriptor failed.")); + } + + if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { + PADDLE_THROW(platform::errors::Unavailable("Set socket options failed.")); + } address.sin_family = AF_INET; address.sin_addr.s_addr = INADDR_ANY; address.sin_port = htons(port); - if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) - PADDLE_THROW("binding failed on ep: %s", ep); + if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) { + PADDLE_THROW( + platform::errors::Unavailable("Bind on endpoint %s failed.", ep)); + } + VLOG(3) << "listening on: " << ep; - if (listen(server_fd, 3) < 0) PADDLE_THROW("listen on server fd failed"); + if (listen(server_fd, 3) < 0) { + PADDLE_THROW(platform::errors::Unavailable( + "Listen on server file descriptor failed.")); + } if ((new_socket = accept(server_fd, reinterpret_cast(&address), - reinterpret_cast(&addrlen))) < 0) - PADDLE_THROW("accept the new socket fd failed"); + reinterpret_cast(&addrlen))) < 0) { + PADDLE_THROW(platform::errors::Unavailable( + "Accept the new socket file descriptor failed.")); + } + + if (read(new_socket, buffer, 1024) < 0) { + PADDLE_THROW(platform::errors::Unavailable("Read from socket failed.")); + } - if (read(new_socket, buffer, 1024) < 0) - PADDLE_THROW("reading the ncclUniqueId from socket failed"); VLOG(3) << "recevived the ncclUniqueId"; memcpy(nccl_id, buffer, NCCL_UNIQUE_ID_BYTES); @@ -63,8 +80,10 @@ void NCCLParallelContext::RecvNCCLID(const std::string &ep, void NCCLParallelContext::SendNCCLID(const std::string &ep, ncclUniqueId *nccl_id) { auto addr = paddle::string::Split(ep, ':'); - PADDLE_ENFORCE_EQ(addr.size(), 2UL, - "The endpoint should contain host and port: %s", ep); + PADDLE_ENFORCE_EQ( + addr.size(), 2UL, + platform::errors::InvalidArgument( + "The endpoint should contain host and port, but got %s.", ep)); std::string host = addr[0]; int port = std::stoi(addr[1]); // struct sockaddr_in address; @@ -73,15 +92,17 @@ void NCCLParallelContext::SendNCCLID(const std::string &ep, char buffer[1024] = {0}; memcpy(buffer, nccl_id, NCCL_UNIQUE_ID_BYTES); - if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) - PADDLE_THROW("create socket failed"); + if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + PADDLE_THROW(platform::errors::Unavailable("Create socket failed.")); + } memset(&serv_addr, '0', sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_port = htons(port); - if (inet_pton(AF_INET, host.c_str(), &serv_addr.sin_addr) <= 0) - PADDLE_THROW("invalied address: %s", ep); + if (inet_pton(AF_INET, host.c_str(), &serv_addr.sin_addr) <= 0) { + PADDLE_THROW(platform::errors::Unavailable("Open address %s failed.", ep)); + } int try_times = 0; while (true) { -- GitLab From 0b4bb023a7ef93669e9007f7e6241f24c6e98cb6 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 25 Sep 2020 21:35:40 +0800 Subject: [PATCH 103/117] Add static mode check on data() (#27495) * add static check on data() * follow comments * fix ut --- python/paddle/fluid/data.py | 2 ++ python/paddle/fluid/framework.py | 12 +++++++++++- python/paddle/fluid/layers/io.py | 2 ++ python/paddle/fluid/tests/unittests/test_data.py | 12 ++++++++++++ .../tests/unittests/test_deprecated_decorator.py | 2 ++ python/paddle/static/input.py | 2 ++ 6 files changed, 31 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/data.py b/python/paddle/fluid/data.py index dc57e9f71ed..05ea66f5445 100644 --- a/python/paddle/fluid/data.py +++ b/python/paddle/fluid/data.py @@ -19,10 +19,12 @@ from paddle.fluid import core from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.data_feeder import check_dtype, check_type from ..utils import deprecated +from paddle.fluid.framework import static_only __all__ = ['data'] +@static_only @deprecated(since="2.0.0", update_to="paddle.static.data") def data(name, shape, dtype='float32', lod_level=0): """ diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 797b32f5d47..c7e66bb2877 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -217,7 +217,16 @@ def _dygraph_not_support_(func): def _dygraph_only_(func): def __impl__(*args, **kwargs): assert in_dygraph_mode( - ), "We Only support %s in dynamic mode, please call 'paddle.disable_static()' to enter dynamic mode." % func.__name__ + ), "We only support '%s()' in dynamic graph mode, please call 'paddle.disable_static()' to enter dynamic graph mode." % func.__name__ + return func(*args, **kwargs) + + return __impl__ + + +def _static_only_(func): + def __impl__(*args, **kwargs): + assert not in_dygraph_mode( + ), "We only support '%s()' in static graph mode, please call 'paddle.enable_static()' to enter static graph mode." % func.__name__ return func(*args, **kwargs) return __impl__ @@ -260,6 +269,7 @@ def deprecate_stat_dict(func): dygraph_not_support = wrap_decorator(_dygraph_not_support_) dygraph_only = wrap_decorator(_dygraph_only_) +static_only = wrap_decorator(_static_only_) fake_interface_only = wrap_decorator(_fake_interface_only_) diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index d513d44acff..6b98dea4290 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -31,6 +31,7 @@ from ..unique_name import generate as unique_name import logging from ..data_feeder import check_dtype, check_type +from paddle.fluid.framework import static_only __all__ = [ 'data', 'read_file', 'double_buffer', 'py_reader', @@ -38,6 +39,7 @@ __all__ = [ ] +@static_only def data(name, shape, append_batch_size=True, diff --git a/python/paddle/fluid/tests/unittests/test_data.py b/python/paddle/fluid/tests/unittests/test_data.py index 8070148f8b3..98739f6e163 100644 --- a/python/paddle/fluid/tests/unittests/test_data.py +++ b/python/paddle/fluid/tests/unittests/test_data.py @@ -99,5 +99,17 @@ class TestApiStaticDataError(unittest.TestCase): self.assertRaises(TypeError, test_shape_type) +class TestApiErrorWithDynamicMode(unittest.TestCase): + def test_error(self): + with program_guard(Program(), Program()): + paddle.disable_static() + self.assertRaises(AssertionError, fluid.data, 'a', [2, 25]) + self.assertRaises( + AssertionError, fluid.layers.data, 'b', shape=[2, 25]) + self.assertRaises( + AssertionError, paddle.static.data, 'c', shape=[2, 25]) + paddle.enable_static() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py index 2a80e20d692..97b6594eb38 100755 --- a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py @@ -72,6 +72,7 @@ class TestDeprecatedDocorator(unittest.TestCase): test old fluid elementwise_mul api, it should fire Warinng function, which insert the Warinng info on top of API's doc string. """ + paddle.enable_static() # Initialization x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32') @@ -80,6 +81,7 @@ class TestDeprecatedDocorator(unittest.TestCase): # captured captured = get_warning_index(fluid.data) + paddle.disable_static() # testting self.assertGreater(expected, captured) diff --git a/python/paddle/static/input.py b/python/paddle/static/input.py index eb70320ea75..d7a3cfcdb92 100644 --- a/python/paddle/static/input.py +++ b/python/paddle/static/input.py @@ -19,10 +19,12 @@ from paddle.fluid import core, Variable from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.data_feeder import check_type from paddle.fluid.framework import convert_np_dtype_to_dtype_ +from paddle.fluid.framework import static_only __all__ = ['data', 'InputSpec'] +@static_only def data(name, shape, dtype=None, lod_level=0): """ **Data Layer** -- GitLab From b38e4f2840ebc4ee0195ab8de789bf5b8d54ef37 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Fri, 25 Sep 2020 22:08:40 +0800 Subject: [PATCH 104/117] Refine vision models (#27476) * refine vision models --- python/paddle/hapi/callbacks.py | 10 +- python/paddle/hapi/model.py | 60 +-- python/paddle/metric/metrics.py | 9 +- python/paddle/tests/CMakeLists.txt | 4 - .../paddle/tests/dist_hapi_mnist_dynamic.py | 2 +- python/paddle/tests/dist_hapi_mnist_static.py | 2 +- python/paddle/tests/test_model.py | 57 ++- python/paddle/tests/test_pretrained_model.py | 45 ++- python/paddle/tests/test_vision_models.py | 2 +- python/paddle/vision/models/lenet.py | 30 +- python/paddle/vision/models/mobilenetv1.py | 189 ++++------ python/paddle/vision/models/mobilenetv2.py | 347 ++++++++---------- python/paddle/vision/models/resnet.py | 337 ++++++++--------- python/paddle/vision/models/vgg.py | 73 ++-- 14 files changed, 539 insertions(+), 628 deletions(-) diff --git a/python/paddle/hapi/callbacks.py b/python/paddle/hapi/callbacks.py index 7ed571fa9c6..69b7fedd72e 100644 --- a/python/paddle/hapi/callbacks.py +++ b/python/paddle/hapi/callbacks.py @@ -301,10 +301,11 @@ class ProgBarLogger(Callback): train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + lenet = paddle.vision.LeNet() + model = paddle.Model(lenet, inputs, labels) - optim = paddle.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001, parameters=lenet.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) @@ -436,10 +437,11 @@ class ModelCheckpoint(Callback): train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + lenet = paddle.vision.LeNet() + model = paddle.Model(lenet, inputs, labels) - optim = paddle.optimizer.Adam(0.001) + optim = paddle.optimizer.Adam(0.001, parameters=lenet.parameters()) model.prepare(optimizer=optim, loss=paddle.nn.CrossEntropyLoss(), metrics=paddle.metric.Accuracy()) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 53928ebed1b..1bfe8f07a2f 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -814,10 +814,9 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - # if use static graph, do not set - paddle.disable_static(device) net = nn.Sequential( + nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10)) @@ -833,7 +832,7 @@ class Model(object): paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy()) - data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + data = paddle.vision.datasets.MNIST(mode='train') model.fit(data, epochs=2, batch_size=32, verbose=1) """ @@ -850,7 +849,8 @@ class Model(object): if not isinstance(inputs, (list, dict, Input)): raise TypeError( - "'inputs' must be list or dict, and couldn't be None.") + "'inputs' must be list or dict in static graph mode") + self._inputs = self._verify_spec(inputs, True) self._labels = self._verify_spec(labels) @@ -885,7 +885,6 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) net = nn.Sequential( nn.Linear(784, 200), @@ -930,7 +929,6 @@ class Model(object): from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) net = nn.Sequential( nn.Linear(784, 200), @@ -970,9 +968,12 @@ class Model(object): import numpy as np import paddle import paddle.nn as nn + from paddle.static import InputSpec device = paddle.set_device('cpu') # or 'gpu' - paddle.disable_static(device) + + input = InputSpec([None, 784], 'float32', 'x') + label = InputSpec([None, 1], 'int64', 'label') net = nn.Sequential( nn.Linear(784, 200), @@ -980,7 +981,7 @@ class Model(object): nn.Linear(200, 10), nn.Softmax()) - model = paddle.Model(net) + model = paddle.Model(net, input, label) model.prepare() data = np.random.random(size=(4,784)).astype(np.float32) out = model.test_batch([data]) @@ -1026,6 +1027,7 @@ class Model(object): def __init__(self): super(Mnist, self).__init__() self.net = nn.Sequential( + nn.Flatten(1), nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10), @@ -1045,7 +1047,7 @@ class Model(object): optim = paddle.optimizer.SGD(learning_rate=1e-3, parameters=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss()) - data = paddle.vision.datasets.MNIST(mode='train', chw_format=False) + data = paddle.vision.datasets.MNIST(mode='train') model.fit(data, epochs=1, batch_size=32, verbose=0) model.save('checkpoint/test') # save for training model.save('inference_model', False) # save for inference @@ -1092,15 +1094,18 @@ class Model(object): import paddle import paddle.nn as nn - + from paddle.static import InputSpec + device = paddle.set_device('cpu') - paddle.disable_static(device) + + input = InputSpec([None, 784], 'float32', 'x') model = paddle.Model(nn.Sequential( nn.Linear(784, 200), nn.Tanh(), nn.Linear(200, 10), - nn.Softmax())) + nn.Softmax()), input) + model.save('checkpoint/test') model.load('checkpoint/test') """ @@ -1165,13 +1170,15 @@ class Model(object): import paddle import paddle.nn as nn + from paddle.static import InputSpec - paddle.disable_static() - + input = InputSpec([None, 784], 'float32', 'x') + model = paddle.Model(nn.Sequential( nn.Linear(784, 200), nn.Tanh(), - nn.Linear(200, 10))) + nn.Linear(200, 10)), input) + params = model.parameters() """ return self._adapter.parameters() @@ -1313,7 +1320,7 @@ class Model(object): label = InputSpec([None, 1], 'int64', 'label') model = paddle.Model( - paddle.vision.models.LeNet(classifier_activation=None), + paddle.vision.models.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) @@ -1350,7 +1357,7 @@ class Model(object): label = InputSpec([None, 1], 'int64', 'label') model = paddle.Model( - paddle.vision.models.LeNet(classifier_activation=None), input, label) + paddle.vision.models.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( @@ -1483,7 +1490,7 @@ class Model(object): # imperative mode paddle.disable_static() - model = paddle.Model(paddle.vision.models.LeNet()) + model = paddle.Model(paddle.vision.models.LeNet(), input, label) model.prepare(metrics=paddle.metric.Accuracy()) result = model.evaluate(val_dataset, batch_size=64) print(result) @@ -1580,19 +1587,20 @@ class Model(object): test_dataset = MnistDataset(mode='test', return_label=False) - # declarative mode + # imperative mode input = InputSpec([-1, 1, 28, 28], 'float32', 'image') model = paddle.Model(paddle.vision.models.LeNet(), input) model.prepare() - result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) - # imperative mode + # declarative mode device = paddle.set_device('cpu') - paddle.disable_static(device) - model = paddle.Model(paddle.vision.models.LeNet()) + paddle.enable_static() + input = InputSpec([-1, 1, 28, 28], 'float32', 'image') + model = paddle.Model(paddle.vision.models.LeNet(), input) model.prepare() + result = model.predict(test_dataset, batch_size=64) print(len(result[0]), result[0][0].shape) """ @@ -1832,15 +1840,11 @@ class Model(object): import paddle from paddle.static import InputSpec - - dynamic = True - device = paddle.set_device('cpu') - paddle.disable_static(device) if dynamic else None input = InputSpec([None, 1, 28, 28], 'float32', 'image') label = InputSpec([None, 1], 'int64', 'label') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + model = paddle.Model(paddle.vision.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 1cd65171ff0..f4a9b8c01d0 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -182,7 +182,6 @@ class Accuracy(Metric): import numpy as np import paddle - paddle.disable_static() x = paddle.to_tensor(np.array([ [0.1, 0.2, 0.3, 0.4], [0.1, 0.4, 0.3, 0.2], @@ -202,11 +201,13 @@ class Accuracy(Metric): .. code-block:: python import paddle - - paddle.disable_static() + from paddle.static import InputSpec + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') train_dataset = paddle.vision.datasets.MNIST(mode='train') - model = paddle.Model(paddle.vision.LeNet(classifier_activation=None)) + model = paddle.Model(paddle.vision.LeNet(), input, label) optim = paddle.optimizer.Adam( learning_rate=0.001, parameters=model.parameters()) model.prepare( diff --git a/python/paddle/tests/CMakeLists.txt b/python/paddle/tests/CMakeLists.txt index 6fb73b08c11..e1bc65a5d15 100644 --- a/python/paddle/tests/CMakeLists.txt +++ b/python/paddle/tests/CMakeLists.txt @@ -8,10 +8,6 @@ foreach(TEST_OP ${DIST_TEST_OPS}) list(REMOVE_ITEM TEST_OPS ${TEST_OP}) endforeach() -# disable test_pretrained_model and test_vision_models -list(REMOVE_ITEM TEST_OPS test_pretrained_model) -list(REMOVE_ITEM TEST_OPS test_vision_models) - foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() diff --git a/python/paddle/tests/dist_hapi_mnist_dynamic.py b/python/paddle/tests/dist_hapi_mnist_dynamic.py index 13d966bf38f..46d02789402 100644 --- a/python/paddle/tests/dist_hapi_mnist_dynamic.py +++ b/python/paddle/tests/dist_hapi_mnist_dynamic.py @@ -68,7 +68,7 @@ class TestDistTraning(unittest.TestCase): inputs = [Input(im_shape, 'float32', 'image')] labels = [Input([None, 1], 'int64', 'label')] - model = Model(LeNet(classifier_activation=None), inputs, labels) + model = Model(LeNet(), inputs, labels) optim = fluid.optimizer.Momentum( learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) diff --git a/python/paddle/tests/dist_hapi_mnist_static.py b/python/paddle/tests/dist_hapi_mnist_static.py index 9d8e5f3652c..eab34a6dafb 100644 --- a/python/paddle/tests/dist_hapi_mnist_static.py +++ b/python/paddle/tests/dist_hapi_mnist_static.py @@ -67,7 +67,7 @@ class TestDistTraning(unittest.TestCase): inputs = [Input(im_shape, 'float32', 'image')] labels = [Input([None, 1], 'int64', 'label')] - model = Model(LeNet(classifier_activation=None), inputs, labels) + model = Model(LeNet(), inputs, labels) optim = fluid.optimizer.Momentum( learning_rate=0.001, momentum=.9, parameter_list=model.parameters()) model.prepare(optim, CrossEntropyLoss(), Accuracy()) diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index c89cbbbfbda..5a3d837407b 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -40,7 +40,7 @@ from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTra class LeNetDygraph(paddle.nn.Layer): - def __init__(self, num_classes=10, classifier_activation=None): + def __init__(self, num_classes=10): super(LeNetDygraph, self).__init__() self.num_classes = num_classes self.features = Sequential( @@ -55,8 +55,7 @@ class LeNetDygraph(paddle.nn.Layer): if num_classes > 0: self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10), - Softmax()) #Todo: accept any activation + Linear(400, 120), Linear(120, 84), Linear(84, 10)) def forward(self, inputs): x = self.features(inputs) @@ -67,6 +66,34 @@ class LeNetDygraph(paddle.nn.Layer): return x +class LeNetDeclarative(fluid.dygraph.Layer): + def __init__(self, num_classes=10): + super(LeNetDeclarative, self).__init__() + self.num_classes = num_classes + self.features = Sequential( + Conv2d( + 1, 6, 3, stride=1, padding=1), + ReLU(), + Pool2D(2, 'max', 2), + Conv2d( + 6, 16, 5, stride=1, padding=0), + ReLU(), + Pool2D(2, 'max', 2)) + + if num_classes > 0: + self.fc = Sequential( + Linear(400, 120), Linear(120, 84), Linear(84, 10)) + + @declarative + def forward(self, inputs): + x = self.features(inputs) + + if self.num_classes > 0: + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + class MnistDataset(MNIST): def __init__(self, mode, return_label=True, sample_num=None): super(MnistDataset, self).__init__(mode=mode) @@ -198,7 +225,7 @@ class TestModel(unittest.TestCase): paddle.manual_seed(seed) paddle.framework.random._manual_program_seed(seed) - net = LeNet(classifier_activation=None) + net = LeNet() optim_new = fluid.optimizer.Adam( learning_rate=0.001, parameter_list=net.parameters()) model = Model(net, inputs=self.inputs, labels=self.labels) @@ -287,14 +314,12 @@ class TestModel(unittest.TestCase): class MyModel(paddle.nn.Layer): - def __init__(self, classifier_activation='softmax'): + def __init__(self): super(MyModel, self).__init__() self._fc = Linear(20, 10) - self._act = Softmax() #Todo: accept any activation def forward(self, x): y = self._fc(x) - y = self._act(y) return y @@ -311,7 +336,7 @@ class TestModelFunction(unittest.TestCase): def get_expect(): fluid.enable_dygraph(fluid.CPUPlace()) self.set_seed() - m = MyModel(classifier_activation=None) + m = MyModel() optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=m.parameters()) m.train() @@ -330,7 +355,7 @@ class TestModelFunction(unittest.TestCase): fluid.enable_dygraph(device) if dynamic else None self.set_seed() - net = MyModel(classifier_activation=None) + net = MyModel() optim2 = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=net.parameters()) @@ -374,7 +399,7 @@ class TestModelFunction(unittest.TestCase): for dynamic in [True, False]: device = paddle.set_device('cpu') fluid.enable_dygraph(device) if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -417,7 +442,7 @@ class TestModelFunction(unittest.TestCase): fluid.enable_dygraph(device) inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] - model = Model(MyModel(classifier_activation=None), inputs, labels) + model = Model(MyModel(), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) @@ -426,7 +451,7 @@ class TestModelFunction(unittest.TestCase): inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] - model = Model(MyModel(classifier_activation=None), inputs, labels) + model = Model(MyModel(), inputs, labels) optim = fluid.optimizer.SGD(learning_rate=0.001, parameter_list=model.parameters()) model.prepare(optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) @@ -436,7 +461,7 @@ class TestModelFunction(unittest.TestCase): def test_static_save_dynamic_load(self): path = tempfile.mkdtemp() - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -448,7 +473,7 @@ class TestModelFunction(unittest.TestCase): device = paddle.set_device('cpu') fluid.enable_dygraph(device) #if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 20], 'float32', 'x')] labels = [InputSpec([None, 1], 'int64', 'label')] optim = fluid.optimizer.SGD(learning_rate=0.001, @@ -557,7 +582,7 @@ class TestModelFunction(unittest.TestCase): class TestRaiseError(unittest.TestCase): def test_input_without_name(self): - net = MyModel(classifier_activation=None) + net = MyModel() inputs = [InputSpec([None, 10], 'float32')] labels = [InputSpec([None, 1], 'int64', 'label')] @@ -567,7 +592,7 @@ class TestRaiseError(unittest.TestCase): def test_input_without_input_spec(self): for dynamic in [True, False]: paddle.disable_static() if dynamic else None - net = MyModel(classifier_activation=None) + net = MyModel() with self.assertRaises(TypeError): model = Model(net) paddle.enable_static() diff --git a/python/paddle/tests/test_pretrained_model.py b/python/paddle/tests/test_pretrained_model.py index 641147d39e9..bf9c2a2ae06 100644 --- a/python/paddle/tests/test_pretrained_model.py +++ b/python/paddle/tests/test_pretrained_model.py @@ -13,6 +13,8 @@ # limitations under the License. import unittest +import tempfile +import shutil import numpy as np import paddle @@ -23,27 +25,36 @@ import paddle.vision.models as models # test the predicted resutls of static graph and dynamic graph are equal # when used pretrained model class TestPretrainedModel(unittest.TestCase): - def infer(self, x, arch, dygraph=True): - if dygraph: - paddle.disable_static() - - net = models.__dict__[arch](pretrained=True, classifier_activation=None) - inputs = [InputSpec([None, 3, 224, 224], 'float32', 'image')] - model = paddle.Model(network=net, inputs=inputs) - model.prepare() - res = model.test_batch(x) - - if dygraph: - paddle.enable_static() - return res + def infer(self, arch): + path = tempfile.mkdtemp() + x = np.array(np.random.random((2, 3, 224, 224)), dtype=np.float32) + res = {} + for dygraph in [True, False]: + if not dygraph: + paddle.enable_static() + + net = models.__dict__[arch]() + inputs = [InputSpec([None, 3, 224, 224], 'float32', 'image')] + model = paddle.Model(network=net, inputs=inputs) + model.prepare() + + if dygraph: + model.save(path) + res['dygraph'] = model.test_batch(x) + else: + model.load(path) + res['static'] = model.test_batch(x) + + if not dygraph: + paddle.disable_static() + + shutil.rmtree(path) + np.testing.assert_allclose(res['dygraph'], res['static']) def test_models(self): arches = ['mobilenet_v1', 'mobilenet_v2', 'resnet18'] for arch in arches: - x = np.array(np.random.random((2, 3, 224, 224)), dtype=np.float32) - y_dygraph = self.infer(x, arch) - y_static = self.infer(x, arch, dygraph=False) - np.testing.assert_allclose(y_dygraph, y_static) + self.infer(arch) if __name__ == '__main__': diff --git a/python/paddle/tests/test_vision_models.py b/python/paddle/tests/test_vision_models.py index 44f9ab53901..6489b02615b 100644 --- a/python/paddle/tests/test_vision_models.py +++ b/python/paddle/tests/test_vision_models.py @@ -36,7 +36,7 @@ class TestVisonModels(unittest.TestCase): model.test_batch(x) def test_mobilenetv2_pretrained(self): - self.models_infer('mobilenet_v2', pretrained=True) + self.models_infer('mobilenet_v2', pretrained=False) def test_mobilenetv1(self): self.models_infer('mobilenet_v1') diff --git a/python/paddle/vision/models/lenet.py b/python/paddle/vision/models/lenet.py index c2d4be7cda1..b30d5992f9a 100644 --- a/python/paddle/vision/models/lenet.py +++ b/python/paddle/vision/models/lenet.py @@ -12,20 +12,19 @@ #See the License for the specific language governing permissions and #limitations under the License. -import paddle.fluid as fluid -from paddle.nn import Conv2d, Pool2D, Linear, ReLU, Sequential, Softmax +import paddle +import paddle.nn as nn __all__ = ['LeNet'] -class LeNet(fluid.dygraph.Layer): +class LeNet(nn.Layer): """LeNet model from `"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_ Args: num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 10. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -35,28 +34,27 @@ class LeNet(fluid.dygraph.Layer): model = LeNet() """ - def __init__(self, num_classes=10, classifier_activation='softmax'): + def __init__(self, num_classes=10): super(LeNet, self).__init__() self.num_classes = num_classes - self.features = Sequential( - Conv2d( + self.features = nn.Sequential( + nn.Conv2d( 1, 6, 3, stride=1, padding=1), - ReLU(), - Pool2D(2, 'max', 2), - Conv2d( + nn.ReLU(), + nn.MaxPool2d(2, 2), + nn.Conv2d( 6, 16, 5, stride=1, padding=0), - ReLU(), - Pool2D(2, 'max', 2)) + nn.ReLU(), + nn.MaxPool2d(2, 2)) if num_classes > 0: - self.fc = Sequential( - Linear(400, 120), Linear(120, 84), Linear(84, 10), - Softmax()) #Todo: accept any activation + self.fc = nn.Sequential( + nn.Linear(400, 120), nn.Linear(120, 84), nn.Linear(84, 10)) def forward(self, inputs): x = self.features(inputs) if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) + x = paddle.flatten(x, 1) x = self.fc(x) return x diff --git a/python/paddle/vision/models/mobilenetv1.py b/python/paddle/vision/models/mobilenetv1.py index 10defbf593d..39654122e3b 100644 --- a/python/paddle/vision/models/mobilenetv1.py +++ b/python/paddle/vision/models/mobilenetv1.py @@ -12,10 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -24,85 +22,66 @@ __all__ = ['MobileNetV1', 'mobilenet_v1'] model_urls = { 'mobilenetv1_1.0': ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams', - 'bf0d25cb0bed1114d9dac9384ce2b4a6') + '42a154c2f26f86e7457d6daded114e8c') } -class ConvBNLayer(fluid.dygraph.Layer): +class ConvBNLayer(nn.Layer): def __init__(self, - num_channels, - filter_size, - num_filters, + in_channels, + out_channels, + kernel_size, stride, padding, - channels=None, - num_groups=1, - act='relu', - use_cudnn=True, - name=None): + num_groups=1): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, + self._conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size, stride=stride, padding=padding, groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "_weights"), bias_attr=False) - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), - bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), - moving_mean_name=self.full_name() + "_bn" + '_mean', - moving_variance_name=self.full_name() + "_bn" + '_variance') + self._norm_layer = nn.BatchNorm2d(out_channels) + self._act = nn.ReLU() - def forward(self, inputs): - y = self._conv(inputs) - y = self._batch_norm(y) - return y + def forward(self, x): + x = self._conv(x) + x = self._norm_layer(x) + x = self._act(x) + return x -class DepthwiseSeparable(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters1, - num_filters2, - num_groups, - stride, - scale, - name=None): +class DepthwiseSeparable(nn.Layer): + def __init__(self, in_channels, out_channels1, out_channels2, num_groups, + stride, scale): super(DepthwiseSeparable, self).__init__() self._depthwise_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=int(num_filters1 * scale), - filter_size=3, + in_channels, + int(out_channels1 * scale), + kernel_size=3, stride=stride, padding=1, - num_groups=int(num_groups * scale), - use_cudnn=False) + num_groups=int(num_groups * scale)) self._pointwise_conv = ConvBNLayer( - num_channels=int(num_filters1 * scale), - filter_size=1, - num_filters=int(num_filters2 * scale), + int(out_channels1 * scale), + int(out_channels2 * scale), + kernel_size=1, stride=1, padding=0) - def forward(self, inputs): - y = self._depthwise_conv(inputs) - y = self._pointwise_conv(y) - return y + def forward(self, x): + x = self._depthwise_conv(x) + x = self._pointwise_conv(x) + return x -class MobileNetV1(fluid.dygraph.Layer): +class MobileNetV1(nn.Layer): """MobileNetV1 model from `"MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" `_. @@ -111,7 +90,6 @@ class MobileNetV1(fluid.dygraph.Layer): num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -121,11 +99,7 @@ class MobileNetV1(fluid.dygraph.Layer): model = MobileNetV1() """ - def __init__(self, - scale=1.0, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + def __init__(self, scale=1.0, num_classes=1000, with_pool=True): super(MobileNetV1, self).__init__() self.scale = scale self.dwsl = [] @@ -133,18 +107,17 @@ class MobileNetV1(fluid.dygraph.Layer): self.with_pool = with_pool self.conv1 = ConvBNLayer( - num_channels=3, - filter_size=3, - channels=3, - num_filters=int(32 * scale), + in_channels=3, + out_channels=int(32 * scale), + kernel_size=3, stride=2, padding=1) dws21 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(32 * scale), - num_filters1=32, - num_filters2=64, + in_channels=int(32 * scale), + out_channels1=32, + out_channels2=64, num_groups=32, stride=1, scale=scale), @@ -153,9 +126,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws22 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(64 * scale), - num_filters1=64, - num_filters2=128, + in_channels=int(64 * scale), + out_channels1=64, + out_channels2=128, num_groups=64, stride=2, scale=scale), @@ -164,9 +137,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws31 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=128, + in_channels=int(128 * scale), + out_channels1=128, + out_channels2=128, num_groups=128, stride=1, scale=scale), @@ -175,9 +148,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws32 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(128 * scale), - num_filters1=128, - num_filters2=256, + in_channels=int(128 * scale), + out_channels1=128, + out_channels2=256, num_groups=128, stride=2, scale=scale), @@ -186,9 +159,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws41 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=256, + in_channels=int(256 * scale), + out_channels1=256, + out_channels2=256, num_groups=256, stride=1, scale=scale), @@ -197,9 +170,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws42 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(256 * scale), - num_filters1=256, - num_filters2=512, + in_channels=int(256 * scale), + out_channels1=256, + out_channels2=512, num_groups=256, stride=2, scale=scale), @@ -209,9 +182,9 @@ class MobileNetV1(fluid.dygraph.Layer): for i in range(5): tmp = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=512, + in_channels=int(512 * scale), + out_channels1=512, + out_channels2=512, num_groups=512, stride=1, scale=scale), @@ -220,9 +193,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws56 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(512 * scale), - num_filters1=512, - num_filters2=1024, + in_channels=int(512 * scale), + out_channels1=512, + out_channels2=1024, num_groups=512, stride=2, scale=scale), @@ -231,9 +204,9 @@ class MobileNetV1(fluid.dygraph.Layer): dws6 = self.add_sublayer( sublayer=DepthwiseSeparable( - num_channels=int(1024 * scale), - num_filters1=1024, - num_filters2=1024, + in_channels=int(1024 * scale), + out_channels1=1024, + out_channels2=1024, num_groups=1024, stride=1, scale=scale), @@ -241,29 +214,23 @@ class MobileNetV1(fluid.dygraph.Layer): self.dwsl.append(dws6) if with_pool: - self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) - - if num_classes > -1: - self.out = Linear( - int(1024 * scale), - num_classes, - act=classifier_activation, - param_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "fc7_weights"), - bias_attr=ParamAttr(name="fc7_offset")) - - def forward(self, inputs): - y = self.conv1(inputs) + self.pool2d_avg = nn.AdaptiveAvgPool2d(1) + + if num_classes > 0: + self.fc = nn.Linear(int(1024 * scale), num_classes) + + def forward(self, x): + x = self.conv1(x) for dws in self.dwsl: - y = dws(y) + x = dws(x) if self.with_pool: - y = self.pool2d_avg(y) + x = self.pool2d_avg(x) if self.num_classes > 0: - y = fluid.layers.reshape(y, shape=[-1, 1024]) - y = self.out(y) - return y + x = paddle.flatten(x, 1) + x = self.fc(x) + return x def _mobilenet(arch, pretrained=False, **kwargs): @@ -275,7 +242,7 @@ def _mobilenet(arch, pretrained=False, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model diff --git a/python/paddle/vision/models/mobilenetv2.py b/python/paddle/vision/models/mobilenetv2.py index c08fb88f8bd..bab8b7b2b1b 100644 --- a/python/paddle/vision/models/mobilenetv2.py +++ b/python/paddle/vision/models/mobilenetv2.py @@ -14,9 +14,9 @@ import numpy as np import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +import paddle.nn as nn +import paddle.nn.functional as F from paddle.utils.download import get_weights_path_from_url @@ -25,221 +25,166 @@ __all__ = ['MobileNetV2', 'mobilenet_v2'] model_urls = { 'mobilenetv2_1.0': ('https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams', - '8ff74f291f72533f2a7956a4efff9d88') + '0340af0a901346c8d46f4529882fb63d') } -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - num_channels, - filter_size, - num_filters, - stride, - padding, - channels=None, - num_groups=1, - use_cudnn=True): - super(ConvBNLayer, self).__init__() - - tmp_param = ParamAttr(name=self.full_name() + "_weights") - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=padding, - groups=num_groups, - act=None, - use_cudnn=use_cudnn, - param_attr=tmp_param, - bias_attr=False) - - self._batch_norm = BatchNorm( - num_filters, - param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), - bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), - moving_mean_name=self.full_name() + "_bn" + '_mean', - moving_variance_name=self.full_name() + "_bn" + '_variance') - - def forward(self, inputs, if_act=True): - y = self._conv(inputs) - y = self._batch_norm(y) - if if_act: - y = fluid.layers.relu6(y) - return y - - -class InvertedResidualUnit(fluid.dygraph.Layer): - def __init__( - self, - num_channels, - num_in_filter, - num_filters, - stride, - filter_size, - padding, - expansion_factor, ): - super(InvertedResidualUnit, self).__init__() - num_expfilter = int(round(num_in_filter * expansion_factor)) - self._expand_conv = ConvBNLayer( - num_channels=num_channels, - num_filters=num_expfilter, - filter_size=1, - stride=1, - padding=0, - num_groups=1) - - self._bottleneck_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_expfilter, - filter_size=filter_size, - stride=stride, - padding=padding, - num_groups=num_expfilter, - use_cudnn=False) - - self._linear_conv = ConvBNLayer( - num_channels=num_expfilter, - num_filters=num_filters, - filter_size=1, - stride=1, - padding=0, - num_groups=1) - - def forward(self, inputs, ifshortcut): - y = self._expand_conv(inputs, if_act=True) - y = self._bottleneck_conv(y, if_act=True) - y = self._linear_conv(y, if_act=False) - if ifshortcut: - y = fluid.layers.elementwise_add(inputs, y) - return y - - -class InvresiBlocks(fluid.dygraph.Layer): - def __init__(self, in_c, t, c, n, s): - super(InvresiBlocks, self).__init__() - - self._first_block = InvertedResidualUnit( - num_channels=in_c, - num_in_filter=in_c, - num_filters=c, - stride=s, - filter_size=3, - padding=1, - expansion_factor=t) - - self._inv_blocks = [] - for i in range(1, n): - tmp = self.add_sublayer( - sublayer=InvertedResidualUnit( - num_channels=c, - num_in_filter=c, - num_filters=c, - stride=1, - filter_size=3, - padding=1, - expansion_factor=t), - name=self.full_name() + "_" + str(i + 1)) - self._inv_blocks.append(tmp) - - def forward(self, inputs): - y = self._first_block(inputs, ifshortcut=False) - for inv_block in self._inv_blocks: - y = inv_block(y, ifshortcut=True) - return y - - -class MobileNetV2(fluid.dygraph.Layer): - """MobileNetV2 model from - `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. - - Args: - scale (float): scale of channels in each layer. Default: 1.0. - num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer - will not be defined. Default: 1000. - with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. - - Examples: - .. code-block:: python +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - from paddle.vision.models import MobileNetV2 + if new_v < 0.9 * v: + new_v += divisor + return new_v - model = MobileNetV2() - """ +class ConvBNReLU(nn.Sequential): + def __init__(self, + in_planes, + out_planes, + kernel_size=3, + stride=1, + groups=1, + norm_layer=nn.BatchNorm2d): + padding = (kernel_size - 1) // 2 + + super(ConvBNReLU, self).__init__( + nn.Conv2d( + in_planes, + out_planes, + kernel_size, + stride, + padding, + groups=groups, + bias_attr=False), + norm_layer(out_planes), + nn.ReLU6()) + + +class InvertedResidual(nn.Layer): def __init__(self, - scale=1.0, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + inp, + oup, + stride, + expand_ratio, + norm_layer=nn.BatchNorm2d): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + layers.append( + ConvBNReLU( + inp, hidden_dim, kernel_size=1, norm_layer=norm_layer)) + layers.extend([ + ConvBNReLU( + hidden_dim, + hidden_dim, + stride=stride, + groups=hidden_dim, + norm_layer=norm_layer), + nn.Conv2d( + hidden_dim, oup, 1, 1, 0, bias_attr=False), + norm_layer(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Layer): + def __init__(self, scale=1.0, num_classes=1000, with_pool=True): + """MobileNetV2 model from + `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" `_. + + Args: + scale (float): scale of channels in each layer. Default: 1.0. + num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer + will not be defined. Default: 1000. + with_pool (bool): use pool before the last fc layer or not. Default: True. + + Examples: + .. code-block:: python + + from paddle.vision.models import MobileNetV2 + + model = MobileNetV2() + """ super(MobileNetV2, self).__init__() - self.scale = scale self.num_classes = num_classes self.with_pool = with_pool + input_channel = 32 + last_channel = 1280 + + block = InvertedResidual + round_nearest = 8 + norm_layer = nn.BatchNorm2d + inverted_residual_setting = [ + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] - bottleneck_params_list = [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 3, 2), - (6, 64, 4, 2), - (6, 96, 3, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), + input_channel = _make_divisible(input_channel * scale, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, scale), + round_nearest) + features = [ + ConvBNReLU( + 3, input_channel, stride=2, norm_layer=norm_layer) ] - self._conv1 = ConvBNLayer( - num_channels=3, - num_filters=int(32 * scale), - filter_size=3, - stride=2, - padding=1) - - self._invl = [] - i = 1 - in_c = int(32 * scale) - for layer_setting in bottleneck_params_list: - t, c, n, s = layer_setting - i += 1 - tmp = self.add_sublayer( - sublayer=InvresiBlocks( - in_c=in_c, t=t, c=int(c * scale), n=n, s=s), - name='conv' + str(i)) - self._invl.append(tmp) - in_c = int(c * scale) - - self._out_c = int(1280 * scale) if scale > 1.0 else 1280 - self._conv9 = ConvBNLayer( - num_channels=in_c, - num_filters=self._out_c, - filter_size=1, - stride=1, - padding=0) + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * scale, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append( + block( + input_channel, + output_channel, + stride, + expand_ratio=t, + norm_layer=norm_layer)) + input_channel = output_channel + + features.append( + ConvBNReLU( + input_channel, + self.last_channel, + kernel_size=1, + norm_layer=norm_layer)) + + self.features = nn.Sequential(*features) if with_pool: - self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) - - if num_classes > 0: - tmp_param = ParamAttr(name=self.full_name() + "fc10_weights") - self._fc = Linear( - self._out_c, - num_classes, - act=classifier_activation, - param_attr=tmp_param, - bias_attr=ParamAttr(name="fc10_offset")) - - def forward(self, inputs): - y = self._conv1(inputs, if_act=True) - for inv in self._invl: - y = inv(y) - y = self._conv9(y, if_act=True) + self.pool2d_avg = nn.AdaptiveAvgPool2d(1) + + if self.num_classes > 0: + self.classifier = nn.Sequential( + nn.Dropout(0.2), nn.Linear(self.last_channel, num_classes)) + + def forward(self, x): + x = self.features(x) if self.with_pool: - y = self._pool2d_avg(y) + x = self.pool2d_avg(x) + if self.num_classes > 0: - y = fluid.layers.reshape(y, shape=[-1, self._out_c]) - y = self._fc(y) - return y + x = paddle.flatten(x, 1) + x = self.classifier(x) + return x def _mobilenet(arch, pretrained=False, **kwargs): @@ -251,7 +196,7 @@ def _mobilenet(arch, pretrained=False, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model diff --git a/python/paddle/vision/models/resnet.py b/python/paddle/vision/models/resnet.py index da0c3e9eb3f..f9e00aefd6b 100644 --- a/python/paddle/vision/models/resnet.py +++ b/python/paddle/vision/models/resnet.py @@ -15,11 +15,8 @@ from __future__ import division from __future__ import print_function -import math -import paddle.fluid as fluid - -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear -from paddle.fluid.dygraph.container import Sequential +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -29,143 +26,129 @@ __all__ = [ model_urls = { 'resnet18': ('https://paddle-hapi.bj.bcebos.com/models/resnet18.pdparams', - '0ba53eea9bc970962d0ef96f7b94057e'), + 'cf548f46534aa3560945be4b95cd11c4'), 'resnet34': ('https://paddle-hapi.bj.bcebos.com/models/resnet34.pdparams', - '46bc9f7c3dd2e55b7866285bee91eff3'), + '8d2275cf8706028345f78ac0e1d31969'), 'resnet50': ('https://paddle-hapi.bj.bcebos.com/models/resnet50.pdparams', - '5ce890a9ad386df17cf7fe2313dca0a1'), + 'ca6f485ee1ab0492d38f323885b0ad80'), 'resnet101': ('https://paddle-hapi.bj.bcebos.com/models/resnet101.pdparams', - 'fb07a451df331e4b0bb861ed97c3a9b9'), + '02f35f034ca3858e1e54d4036443c92d'), 'resnet152': ('https://paddle-hapi.bj.bcebos.com/models/resnet152.pdparams', - 'f9c700f26d3644bb76ad2226ed5f5713'), + '7ad16a2f1e7333859ff986138630fd7a'), } -class ConvBNLayer(fluid.dygraph.Layer): +class BasicBlock(nn.Layer): + expansion = 1 + def __init__(self, - num_channels, - num_filters, - filter_size, + inplanes, + planes, stride=1, + downsample=None, groups=1, - act=None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, act=act) - - def forward(self, inputs): - x = self._conv(inputs) - x = self._batch_norm(x) - - return x - - -class BasicBlock(fluid.dygraph.Layer): - """residual block of resnet18 and resnet34 - """ - expansion = 1 - - def __init__(self, num_channels, num_filters, stride, shortcut=True): + base_width=64, + dilation=1, + norm_layer=None): super(BasicBlock, self).__init__() + if norm_layer is None: + norm_layer = nn.BatchNorm2d - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=3, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') + if dilation > 1: + raise NotImplementedError( + "Dilation > 1 not supported in BasicBlock") - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - stride=stride) + self.conv1 = nn.Conv2d( + inplanes, planes, 3, padding=1, stride=stride, bias_attr=False) + self.bn1 = norm_layer(planes) + self.relu = nn.ReLU() + self.conv2 = nn.Conv2d(planes, planes, 3, padding=1, bias_attr=False) + self.bn2 = norm_layer(planes) + self.downsample = downsample + self.stride = stride - self.shortcut = shortcut + def forward(self, x): + identity = x - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) - if self.shortcut: - short = inputs - else: - short = self.short(inputs) + out = self.conv2(out) + out = self.bn2(out) - y = short + conv1 + if self.downsample is not None: + identity = self.downsample(x) - return fluid.layers.relu(y) + out += identity + out = self.relu(out) + return out -class BottleneckBlock(fluid.dygraph.Layer): - """residual block of resnet50, resnet101 amd resnet152 - """ + +class BottleneckBlock(nn.Layer): expansion = 4 - def __init__(self, num_channels, num_filters, stride, shortcut=True): + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1, + norm_layer=None): super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, + if norm_layer is None: + norm_layer = nn.BatchNorm2d + width = int(planes * (base_width / 64.)) * groups + + self.conv1 = nn.Conv2d(inplanes, width, 1, bias_attr=False) + self.bn1 = norm_layer(width) + + self.conv2 = nn.Conv2d( + width, + width, + 3, + padding=dilation, stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * self.expansion, - filter_size=1, - act=None) + groups=groups, + dilation=dilation, + bias_attr=False) + self.bn2 = norm_layer(width) - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * self.expansion, - filter_size=1, - stride=stride) + self.conv3 = nn.Conv2d( + width, planes * self.expansion, 1, bias_attr=False) + self.bn3 = norm_layer(planes * self.expansion) + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride - self.shortcut = shortcut + def forward(self, x): + identity = x - self._num_channels_out = num_filters * self.expansion + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) - def forward(self, inputs): - x = self.conv0(inputs) - conv1 = self.conv1(x) - conv2 = self.conv2(conv1) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) - if self.shortcut: - short = inputs - else: - short = self.short(inputs) + out = self.conv3(out) + out = self.bn3(out) - x = fluid.layers.elementwise_add(x=short, y=conv2) + if self.downsample is not None: + identity = self.downsample(x) - return fluid.layers.relu(x) + out += identity + out = self.relu(out) + return out -class ResNet(fluid.dygraph.Layer): + +class ResNet(nn.Layer): """ResNet model from `"Deep Residual Learning for Image Recognition" `_ @@ -175,7 +158,6 @@ class ResNet(fluid.dygraph.Layer): num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. with_pool (bool): use pool before the last fc layer or not. Default: True. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -189,82 +171,87 @@ class ResNet(fluid.dygraph.Layer): """ - def __init__(self, - Block, - depth=50, - num_classes=1000, - with_pool=True, - classifier_activation='softmax'): + def __init__(self, block, depth, num_classes=1000, with_pool=True): super(ResNet, self).__init__() - - self.num_classes = num_classes - self.with_pool = with_pool - - layer_config = { + layer_cfg = { 18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], + 152: [3, 8, 36, 3] } - assert depth in layer_config.keys(), \ - "supported depth are {} but input layer is {}".format( - layer_config.keys(), depth) - - layers = layer_config[depth] - - in_channels = 64 - out_channels = [64, 128, 256, 512] - - self.conv = ConvBNLayer( - num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') - self.pool = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - - self.layers = [] - for idx, num_blocks in enumerate(layers): - blocks = [] - shortcut = False - for b in range(num_blocks): - if b == 1: - in_channels = out_channels[idx] * Block.expansion - block = Block( - num_channels=in_channels, - num_filters=out_channels[idx], - stride=2 if b == 0 and idx != 0 else 1, - shortcut=shortcut) - blocks.append(block) - shortcut = True - layer = self.add_sublayer("layer_{}".format(idx), - Sequential(*blocks)) - self.layers.append(layer) + layers = layer_cfg[depth] + self.num_classes = num_classes + self.with_pool = with_pool + self._norm_layer = nn.BatchNorm2d + + self.inplanes = 64 + self.dilation = 1 + self.conv1 = nn.Conv2d( + 3, + self.inplanes, + kernel_size=7, + stride=2, + padding=3, + bias_attr=False) + self.bn1 = self._norm_layer(self.inplanes) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) if with_pool: - self.global_pool = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) if num_classes > 0: - stdv = 1.0 / math.sqrt(out_channels[-1] * Block.expansion * 1.0) - self.fc_input_dim = out_channels[-1] * Block.expansion * 1 * 1 - self.fc = Linear( - self.fc_input_dim, - num_classes, - act=classifier_activation, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - def forward(self, inputs): - x = self.conv(inputs) - x = self.pool(x) - for layer in self.layers: - x = layer(x) - - if self.with_pool: - x = self.global_pool(x) - - if self.num_classes > -1: - x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) + self.fc = nn.Linear(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, stride=1, dilate=False): + norm_layer = self._norm_layer + downsample = None + previous_dilation = self.dilation + if dilate: + self.dilation *= stride + stride = 1 + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d( + self.inplanes, + planes * block.expansion, + 1, + stride=stride, + bias_attr=False), + norm_layer(planes * block.expansion), ) + + layers = [] + layers.append( + block(self.inplanes, planes, stride, downsample, 1, 64, + previous_dilation, norm_layer)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, norm_layer=norm_layer)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + if self.with_pool > 0: + x = self.avgpool(x) + + if self.num_classes > 0: + x = paddle.flatten(x, 1) x = self.fc(x) + return x @@ -277,7 +264,7 @@ def _resnet(arch, Block, depth, pretrained, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.set_dict(param) return model diff --git a/python/paddle/vision/models/vgg.py b/python/paddle/vision/models/vgg.py index 8bfacda2476..d11845b6616 100644 --- a/python/paddle/vision/models/vgg.py +++ b/python/paddle/vision/models/vgg.py @@ -12,9 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid as fluid -from paddle.nn import Conv2d, Pool2D, BatchNorm, Linear, ReLU, Softmax -from paddle.fluid.dygraph.container import Sequential +import paddle +import paddle.nn as nn from paddle.utils.download import get_weights_path_from_url @@ -28,39 +27,18 @@ __all__ = [ model_urls = { 'vgg16': ('https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams', - 'c788f453a3b999063e8da043456281ee') + '89bbffc0f87d260be9b8cdc169c991c4') } -class Classifier(fluid.dygraph.Layer): - def __init__(self, num_classes, classifier_activation='softmax'): - super(Classifier, self).__init__() - self.linear1 = Linear(512 * 7 * 7, 4096) - self.linear2 = Linear(4096, 4096) - self.linear3 = Linear(4096, num_classes) - self.act = Softmax() #Todo: accept any activation - - def forward(self, x): - x = self.linear1(x) - x = fluid.layers.relu(x) - x = fluid.layers.dropout(x, 0.5) - x = self.linear2(x) - x = fluid.layers.relu(x) - x = fluid.layers.dropout(x, 0.5) - x = self.linear3(x) - out = self.act(x) - return out - - -class VGG(fluid.dygraph.Layer): +class VGG(nn.Layer): """VGG model from `"Very Deep Convolutional Networks For Large-Scale Image Recognition" `_ Args: - features (fluid.dygraph.Layer): vgg features create by function make_layers. + features (nn.Layer): vgg features create by function make_layers. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer will not be defined. Default: 1000. - classifier_activation (str): activation for the last fc layer. Default: 'softmax'. Examples: .. code-block:: python @@ -76,44 +54,41 @@ class VGG(fluid.dygraph.Layer): """ - def __init__(self, - features, - num_classes=1000, - classifier_activation='softmax'): + def __init__(self, features, num_classes=1000): super(VGG, self).__init__() self.features = features - self.num_classes = num_classes - - if num_classes > 0: - classifier = Classifier(num_classes, classifier_activation) - self.classifier = self.add_sublayer("classifier", - Sequential(classifier)) + self.avgpool = nn.AdaptiveAvgPool2d((7, 7)) + self.classifier = nn.Sequential( + nn.Linear(512 * 7 * 7, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, 4096), + nn.ReLU(), + nn.Dropout(), + nn.Linear(4096, num_classes), ) def forward(self, x): x = self.features(x) - - if self.num_classes > 0: - x = fluid.layers.flatten(x, 1) - x = self.classifier(x) + x = self.avgpool(x) + x = paddle.flatten(x, 1) + x = self.classifier(x) return x def make_layers(cfg, batch_norm=False): layers = [] in_channels = 3 - for v in cfg: if v == 'M': - layers += [Pool2D(pool_size=2, pool_stride=2)] + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) if batch_norm: - conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) - layers += [conv2d, BatchNorm(v), ReLU()] + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU()] else: - conv2d = Conv2d(in_channels, v, kernel_size=3, padding=1) - layers += [conv2d, ReLU()] + layers += [conv2d, nn.ReLU()] in_channels = v - return Sequential(*layers) + return nn.Sequential(*layers) cfgs = { @@ -144,7 +119,7 @@ def _vgg(arch, cfg, batch_norm, pretrained, **kwargs): model_urls[arch][1]) assert weight_path.endswith( '.pdparams'), "suffix of weight must be .pdparams" - param, _ = fluid.load_dygraph(weight_path) + param, _ = paddle.load(weight_path) model.load_dict(param) return model -- GitLab From b0ee1405f74e0c598f84694f91cfd331a1ab10ca Mon Sep 17 00:00:00 2001 From: "joanna.wozna.intel" Date: Sat, 26 Sep 2020 07:32:39 +0200 Subject: [PATCH 105/117] Add conv2d bfloat16 support (#27325) --- .../framework/ir/graph_pattern_detector.cc | 3 +- paddle/fluid/operators/conv_op.cc | 8 +- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 54 +++-- .../operators/mkldnn/dequantize_mkldnn_op.cc | 3 +- paddle/fluid/pybind/tensor_py.h | 18 ++ python/paddle/fluid/framework.py | 4 +- .../mkldnn/test_conv2d_bf16_mkldnn_op.py | 208 ++++++++++++++++++ .../mkldnn/test_conv2d_int8_mkldnn_op.py | 4 +- .../mkldnn/test_dequantize_mkldnn_op.py | 13 +- .../paddle/fluid/tests/unittests/op_test.py | 36 ++- 10 files changed, 323 insertions(+), 28 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index 9c1eaa99a3c..96952e20c21 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -1894,8 +1894,7 @@ PDNode *patterns::QuantizePlacement::operator()( PDNode *patterns::Bfloat16Placement::operator()( const std::unordered_set &bfloat16_enabled_op_types) { - std::unordered_set supported_op_types = - std::unordered_set(); + std::unordered_set supported_op_types{"conv2d"}; if (!bfloat16_enabled_op_types.empty()) { supported_op_types = bfloat16_enabled_op_types; } diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index bf97b9d03c4..ef8a2b38f20 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -166,7 +166,8 @@ framework::OpKernelType ConvOp::GetExpectedKernelType( #endif if (input_data_type != framework::proto::VarType::INT8 && - input_data_type != framework::proto::VarType::UINT8) { + input_data_type != framework::proto::VarType::UINT8 && + input_data_type != framework::proto::VarType::BF16) { auto filter_data_type = ctx.Input("Filter")->type(); PADDLE_ENFORCE_EQ(input_data_type, filter_data_type, platform::errors::InvalidArgument( @@ -455,6 +456,11 @@ void Conv3DOpMaker::Make() { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "int8", "bfloat16"}); AddAttr("fuse_relu", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); AddAttr("fuse_activation", diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index a6cda154e55..7a4e11091fd 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -55,12 +55,12 @@ inline MKLDNNMemoryFormat GetWeightsFormat(const MKLDNNMemoryFormat format, } } -static mkldnn::memory::data_type GetDstType(bool is_int8, +static mkldnn::memory::data_type GetDstType(bool is_int8, bool is_bfloat16, bool force_fp32_output, std::string fuse_activation, bool fuse_residual_conn, const Tensor* residual_param) { - auto dst_dt = mkldnn::memory::data_type::f32; // uint8_t, int8_t, float + auto dst_dt = mkldnn::memory::data_type::f32; if (is_int8) { dst_dt = (fuse_activation == "relu" || fuse_activation == "relu6") ? mkldnn::memory::data_type::u8 @@ -72,6 +72,13 @@ static mkldnn::memory::data_type GetDstType(bool is_int8, auto residual_dt = framework::ToMKLDNNDataType(residual_param->type()); if (dst_dt != residual_dt) dst_dt = residual_dt; } + } else { + if (!force_fp32_output && is_bfloat16) { + dst_dt = mkldnn::memory::data_type::bf16; + if (fuse_residual_conn && residual_param) { + dst_dt = framework::ToMKLDNNDataType(residual_param->type()); + } + } } return dst_dt; } @@ -224,12 +231,15 @@ class ConvMKLDNNHandlerT src_tz.size(), chosen_memory_format); } } - - const auto src_md = platform::MKLDNNMemDesc( - src_tz, platform::MKLDNNGetDataType(), chosen_memory_format); - const auto weights_md = - platform::MKLDNNMemDesc(weights_tz, platform::MKLDNNGetDataType(), - MKLDNNMemoryFormat::any); + auto data_type = mkldnn::memory::data_type::f32; + if (ctx.Attr("mkldnn_data_type") == "bfloat16" || + std::is_same::value) + data_type = mkldnn::memory::data_type::bf16; + + const auto src_md = + platform::MKLDNNMemDesc(src_tz, data_type, chosen_memory_format); + const auto weights_md = platform::MKLDNNMemDesc(weights_tz, data_type, + MKLDNNMemoryFormat::any); const auto dst_md = platform::MKLDNNMemDesc( dst_tz, platform::MKLDNNGetDataType(), chosen_memory_format); @@ -241,8 +251,8 @@ class ConvMKLDNNHandlerT if (bias) { auto bias_tz = framework::vectorize(bias->dims()); - auto bias_md = platform::MKLDNNMemDesc( - bias_tz, platform::MKLDNNGetDataType(), MKLDNNMemoryFormat::x); + auto bias_md = + platform::MKLDNNMemDesc(bias_tz, data_type, MKLDNNMemoryFormat::x); this->AcquireForwardPrimitiveDescriptor( conv_attr, fwd_prop_kind, dnnl::algorithm::convolution_direct, @@ -384,15 +394,21 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { "Operator DNNL Conv must use CPUPlace")); bool is_INT8 = std::is_same::value || std::is_same::value; + bool is_BFLOAT16 = ctx.Attr("mkldnn_data_type") == "bfloat16"; + auto residual_param = ctx.Input("ResidualData"); + bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); + std::string fuse_activation = ctx.Attr("fuse_activation"); + bool force_fp32_output = ctx.Attr("force_fp32_output"); + auto dst_dt = + GetDstType(is_INT8, is_BFLOAT16, force_fp32_output, fuse_activation, + fuse_residual_conn, residual_param); if (!is_INT8) { - ComputeFP32(ctx); + if (dst_dt == mkldnn::memory::data_type::f32) { + ComputeFP32(ctx); + } else if (dst_dt == mkldnn::memory::data_type::bf16) { + ComputeFP32(ctx); + } } else { - std::string fuse_activation = ctx.Attr("fuse_activation"); - bool fuse_residual_conn = ctx.Attr("fuse_residual_connection"); - bool force_fp32_output = ctx.Attr("force_fp32_output"); - auto residual_param = ctx.Input("ResidualData"); - auto dst_dt = GetDstType(true, force_fp32_output, fuse_activation, - fuse_residual_conn, residual_param); if (dst_dt == mkldnn::memory::data_type::f32) { ComputeINT8(ctx); } else if (dst_dt == mkldnn::memory::data_type::u8) { @@ -1103,6 +1119,10 @@ REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, ops::kConvMKLDNNFP32, ops::ConvMKLDNNOpKernel); +REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE( + conv2d, MKLDNN, ::paddle::platform::CPUPlace, BF16, ops::kConvMKLDNNFP32, + ops::ConvMKLDNNOpKernel); + REGISTER_OP_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, ::paddle::platform::CPUPlace, U8, ops::kConvMKLDNNINT8, diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 540642c7140..70d4c34d9c5 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -110,4 +110,5 @@ class DeQuantOpKernel : public framework::OpKernel { namespace ops = paddle::operators; REGISTER_OP_KERNEL(dequantize, MKLDNN, ::paddle::platform::CPUPlace, - ops::DeQuantOpKernel, ops::DeQuantOpKernel); + ops::DeQuantOpKernel, ops::DeQuantOpKernel, + ops::DeQuantOpKernel); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 5ee15073267..142ab2bb9d7 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -41,6 +41,7 @@ namespace detail { // import numpy as np // print np.dtype(np.float16).num # 23 constexpr int NPY_FLOAT16_ = 23; +constexpr int NPY_UINT16_ = 4; // Note: Since float16 is not a builtin type in C++, we register // paddle::platform::float16 as numpy.float16. @@ -60,6 +61,23 @@ struct npy_format_descriptor { static PYBIND11_DESCR name() { return _("float16"); } }; +// Note: Since bfloat16 is not a builtin type in C++ and in numpy, +// we register paddle::platform::bfloat16 as numpy.uint16. +template <> +struct npy_format_descriptor { + static py::dtype dtype() { + handle ptr = npy_api::get().PyArray_DescrFromType_(NPY_UINT16_); + return reinterpret_borrow(ptr); + } + static std::string format() { + // Note: "H" represents UINT16. + // Details at: + // https://docs.python.org/3/library/struct.html#format-characters. + return "H"; + } + static PYBIND11_DESCR name() { return _("bfloat16"); } +}; + } // namespace detail } // namespace pybind11 diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index c7e66bb2877..b4cea6761dc 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -613,7 +613,9 @@ def convert_np_dtype_to_dtype_(np_dtype): elif dtype == np.bool: return core.VarDesc.VarType.BOOL elif dtype == np.uint16: - return core.VarDesc.VarType.INT16 + # since there is still no support for bfloat16 in NumPy, + # uint16 is used for casting bfloat16 + return core.VarDesc.VarType.BF16 elif dtype == np.uint8: return core.VarDesc.VarType.UINT8 elif dtype == np.int8: diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py new file mode 100644 index 00000000000..0ac33383fb2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_bf16_mkldnn_op.py @@ -0,0 +1,208 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import struct + +import paddle.fluid.core as core +from paddle.fluid.tests.unittests.op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16 +from paddle.fluid.tests.unittests.test_conv2d_op import conv2d_forward_naive, TestConv2dOp + + +def conv2d_forward_refer(input, filter, group, conv_param): + out, in_n, out_h, out_w, out_c = conv2d_forward_naive(input, filter, group, + conv_param) + return out + + +def conv2d_residual_naive(out, residual): + assert out.shape == residual.shape + out = np.add(out, residual) + return out + + +class TestConv2dBf16Op(TestConv2dOp): + def setUp(self): + self.op_type = "conv2d" + self.use_cudnn = False + self.exhaustive_search = False + self.use_cuda = False + self.use_mkldnn = True + self.weight_type = np.float32 + self.input_type = np.float32 + self.use_mkldnn = True + self.mkldnn_data_type = "bfloat16" + self.force_fp32_output = False + self.init_group() + self.init_dilation() + self.init_test_case() + self.init_fuse_relu() + self.init_fuse_residual() + self.init_data_type() + self.init_force_fp32_output() + + conv2d_param = { + 'stride': self.stride, + 'pad': self.pad, + 'dilation': self.dilations + } + self.input = np.random.random(self.input_size).astype(np.float32) + self.filter = np.random.random(self.filter_size).astype(np.float32) + conv_out, _, _, _, _ = conv2d_forward_naive(self.input, self.filter, + self.groups, conv2d_param) + self.conv_output_float = conv_out + + if self.fuse_residual: + self.input_residual = np.random.random( + self.input_residual_size).astype(np.float32) + self.conv_output_float = conv2d_residual_naive( + self.conv_output_float, self.input_residual) + self.conv_output = convert_float_to_uint16(self.conv_output_float) + self.outputs = {'Output': self.conv_output} + elif self.force_fp32_output: + self.outputs = {'Output': self.conv_output_float.astype(np.float32)} + + if self.input_type is not np.float32: + self.input = convert_float_to_uint16(self.input) + + self.inputs = { + 'Input': self.input.view(self.input_type), + 'Filter': OpTest.np_dtype_to_fluid_dtype( + self.filter.astype(self.weight_type)) + } + + if self.fuse_residual: + self.inputs['ResidualData'] = OpTest.np_dtype_to_fluid_dtype( + convert_float_to_uint16(self.input_residual)) + + self.attrs = { + 'strides': self.stride, + 'paddings': self.pad, + 'groups': self.groups, + 'dilations': self.dilations, + 'use_cudnn': self.use_cudnn, + 'use_mkldnn': self.use_mkldnn, + 'mkldnn_data_type': self.mkldnn_data_type, + 'force_fp32_output': self.force_fp32_output, + 'fuse_residual_connection': self.fuse_residual + } + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace()) + + def test_check_grad(self): + pass + + def test_check_grad_no_filter(self): + pass + + def test_check_grad_no_input(self): + pass + + def init_test_case(self): + TestConv2dOp.init_test_case(self) + self.input_size = [1, 1, 5, 5] # NCHW + f_c = self.input_size[1] // self.groups + self.input_residual_size = [1, 2, 3, 3] + self.filter_size = [2, f_c, 3, 3] + + def init_data_type(self): + self.weight_type = np.float32 + self.input_type = np.float32 + + def init_force_fp32_output(self): + self.force_fp32_output = False + + def init_fuse_relu(self): + self.fuse_activation = "relu" + + def init_fuse_residual(self): + self.fuse_residual = True + + +class TestConv2d(TestConv2dBf16Op): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 5, 5] # NCHW + self.input_residual_size = [2, 6, 3, 3] + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_data_type(self): + self.input_type = np.uint16 + + +class TestWithPad(TestConv2d): + def init_test_case(self): + TestConv2d.init_test_case(self) + self.pad = [1, 1] + self.input_residual_size = [2, 6, 5, 5] + + +class TestWithGroup(TestConv2d): + def init_group(self): + self.groups = 3 + + +class TestWithStride(TestConv2dBf16Op): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.input_size = [2, 3, 6, 6] + self.input_residual_size = [2, 6, 3, 3] + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 3, 3] + + def init_data_type(self): + self.input_type = np.uint16 + + +class TestWith1x1ForceFP32Output(TestConv2dBf16Op): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [1, 3, 5, 5] + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_force_fp32_output(self): + self.force_fp32_output = True + + def init_fuse_residual(self): + self.fuse_residual = False + + +class TestWithInput1x1Filter1x1(TestConv2dBf16Op): + def init_test_case(self): + self.pad = [0, 0] + self.stride = [1, 1] + self.input_size = [2, 3, 1, 1] + self.input_residual_size = [2, 6, 1, 1] + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] // self.groups + self.filter_size = [6, f_c, 1, 1] + + def init_group(self): + self.groups = 3 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py index 7a494e3c2c3..9731efced69 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_conv2d_int8_mkldnn_op.py @@ -36,6 +36,7 @@ class TestConv2dInt8Op(TestConv2dOp): self.use_cuda = False self.use_mkldnn = False self.data_format = "NCHW" + self.mkldnn_data_type = "int8" self.weighttype = np.float32 self.use_mkldnn = True self.init_group() @@ -141,7 +142,8 @@ class TestConv2dInt8Op(TestConv2dOp): 'Scale_weights': self.scale_weights, 'Scale_in_eltwise': self.scale_in_eltwise, 'fuse_activation': self.fuse_activation, - 'fuse_residual_connection': self.fuse_residual + 'fuse_residual_connection': self.fuse_residual, + 'mkldnn_data_type': self.mkldnn_data_type } self.outputs = {'Output': output} diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py index 35419462909..70c76f1fb71 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from paddle.fluid.tests.unittests.op_test import OpTest +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 class TestDeQuantizeOp(OpTest): @@ -32,6 +32,9 @@ class TestDeQuantizeOp(OpTest): input = (np.random.randint(0, 100, self.input_size) - 50 ).astype(self.data_type) output = (input * (1 / self.scale)).astype('float') + elif self.data_type == 'uint16': + output = np.random.random(self.input_size).astype(np.float32) + input = convert_float_to_uint16(output) else: input = (np.random.randint(0, 100, self.input_size)).astype(self.data_type) @@ -70,5 +73,13 @@ class TestDeQuantizeOp2(TestDeQuantizeOp): self.data_type = 'uint8' +class TestDeQuantizeOpBf16(TestDeQuantizeOp): + def set_scale(self): + self.scale = 1.0 + + def set_data_type(self): + self.data_type = 'uint16' + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index a6a4b9574c5..d02fdafe995 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -20,6 +20,7 @@ import warnings import numpy as np import random import six +import struct import time import itertools import collections @@ -167,6 +168,18 @@ def skip_check_grad_ci(reason=None): return wrapper +def copy_bits_from_float_to_uint16(f): + return struct.unpack('> 16 + + +def convert_float_to_uint16(float_list): + new_output = [] + for x in np.nditer(float_list): + new_output.append(np.uint16(copy_bits_from_float_to_uint16(x))) + + return np.reshape(new_output, float_list.shape).view(np.uint16) + + class OpTest(unittest.TestCase): @classmethod def setUpClass(cls): @@ -242,6 +255,11 @@ class OpTest(unittest.TestCase): self.call_once = True self.dtype = data_type + def is_bfloat16_op(self): + return self.dtype == np.uint16 or ( + hasattr(self, 'mkldnn_data_type') and + getattr(self, 'mkldnn_data_type') is "bfloat16") + def infer_dtype_from_inputs_outputs(self, inputs, outputs): def is_np_data(input): return isinstance(input, (np.ndarray, np.generic)) @@ -276,8 +294,9 @@ class OpTest(unittest.TestCase): infer_dtype(inputs, dtype_set) dtype_list = [ np.dtype(np.float64), np.dtype(np.float32), np.dtype(np.float16), - np.dtype(np.int64), np.dtype(np.int32), np.dtype(np.int16), - np.dtype(np.int8), np.dtype(np.uint8), np.dtype(np.bool) + np.dtype(np.int64), np.dtype(np.int32), np.dtype(np.uint16), + np.dtype(np.int16), np.dtype(np.int8), np.dtype(np.uint8), + np.dtype(np.bool) ] # check the dtype in dtype_list in order, select the first dtype that in dtype_set for dtype in dtype_list: @@ -957,6 +976,14 @@ class OpTest(unittest.TestCase): self.op_type not in op_threshold_white_list.NEED_FIX_FP64_CHECK_OUTPUT_THRESHOLD_OP_LIST: atol = 0 + if self.is_bfloat16_op(): + check_dygraph = False + if hasattr(self, 'force_fp32_output') and getattr( + self, 'force_fp32_output'): + atol = 1e-2 + else: + atol = 2 + if no_check_set is not None: if self.op_type not in no_check_set_white_list.no_check_set_white_list: raise AssertionError( @@ -1286,8 +1313,9 @@ class OpTest(unittest.TestCase): no_grad_set = set() else: if (self.op_type not in no_grad_set_white_list.NEED_TO_FIX_OP_LIST - ) and (self.op_type not in - no_grad_set_white_list.NOT_CHECK_OP_LIST): + ) and ( + self.op_type not in no_grad_set_white_list.NOT_CHECK_OP_LIST + ) and (not self.is_bfloat16_op()): raise AssertionError("no_grad_set must be None, op_type is " + self.op_type + " Op.") -- GitLab From ecfdfc9c58d7bcd6e70d0964a469250724359c0b Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Sat, 26 Sep 2020 18:04:35 +0800 Subject: [PATCH 106/117] fix guard place set error (#27573) --- python/paddle/fluid/dygraph/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 01c2f0fed49..69fb23383e5 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -363,7 +363,7 @@ def guard(place=None): with framework.program_guard(train, startup): with framework.unique_name.guard(): with framework._dygraph_guard(tracer): - with framework._dygraph_place_guard(place): + with framework._dygraph_place_guard(expected_place): yield -- GitLab From a85592bcbf837c6d33c528e1dfea380ed6912d42 Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Sat, 26 Sep 2020 19:43:52 +0800 Subject: [PATCH 107/117] fix cpplint error for the autmic max/min fix cpplint error for the autmic max/min --- .../fluid/operators/math/segment_pooling.cu | 17 +++++++------ paddle/fluid/platform/cuda_primitives.h | 24 +++++++++---------- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/paddle/fluid/operators/math/segment_pooling.cu b/paddle/fluid/operators/math/segment_pooling.cu index bb2b6db100b..37155fa184e 100644 --- a/paddle/fluid/operators/math/segment_pooling.cu +++ b/paddle/fluid/operators/math/segment_pooling.cu @@ -12,13 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_div_op.h" +#include #include "paddle/fluid/operators/gather.cu.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/segment_pooling.h" #include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/gpu_launch_param_config.h" -#include "paddle/fluid/platform/macros.h" namespace paddle { namespace operators { @@ -100,7 +99,7 @@ __global__ void SegmentOpsKernel(const Index* segment_ids, const T* input, CUDA_KERNEL_LOOP(stripe_index, h.total_stripe_count) { Index segment_offset, dim_index_base, actual_height; Index inner_dim_size = h.inner_dim_size; - h.calculate(stripe_index, segment_offset, dim_index_base, actual_height); + h.calculate(stripe_index, &segment_offset, &dim_index_base, &actual_height); T minmax = pool.initial(); Index first_segment_id = segment_ids[dim_index_base]; @@ -154,7 +153,7 @@ __global__ void SegmentIndexGradKernel(const Index* segment_ids, const T* input, T* in_grad, Helper h) { CUDA_KERNEL_LOOP(stripe_index, h.total_stripe_count) { Index segment_offset, dim_index_base, actual_height; - h.calculate(stripe_index, segment_offset, dim_index_base, actual_height); + h.calculate(stripe_index, &segment_offset, &dim_index_base, &actual_height); for (Index j = 0; j < actual_height; j++) { Index current_segment_id = segment_ids[dim_index_base + j]; @@ -217,11 +216,11 @@ class ArrangeHelper { total_stripe_count = inner_dim_size * input_outer_dim_num_stripe; } - DEVICE inline void calculate(T stripe_index, T& segment_offset, - T& dim_index_base, T& actual_height) { - segment_offset = stripe_index % inner_dim_size; - dim_index_base = stripe_index / inner_dim_size * DimTileSize; - actual_height = min(DimTileSize, input_length_size - dim_index_base); + DEVICE inline void calculate(T stripe_index, T* segment_offset, + T* dim_index_base, T* actual_height) { + *segment_offset = stripe_index % inner_dim_size; + *dim_index_base = stripe_index / inner_dim_size * DimTileSize; + *actual_height = min(DimTileSize, input_length_size - *dim_index_base); } }; diff --git a/paddle/fluid/platform/cuda_primitives.h b/paddle/fluid/platform/cuda_primitives.h index a5dd19d4363..4d9673e9646 100644 --- a/paddle/fluid/platform/cuda_primitives.h +++ b/paddle/fluid/platform/cuda_primitives.h @@ -137,12 +137,12 @@ USE_CUDA_ATOMIC(Max, unsigned int); #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Max, unsigned long long int); // NOLINT #else -CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) { +CUDA_ATOMIC_WRAPPER(Max, unsigned long long int) { // NOLINT if (*address >= val) { return; } - unsigned long long int old = *address, assumed; + unsigned long long int old = *address, assumed; // NOLINT do { assumed = old; @@ -169,7 +169,7 @@ CUDA_ATOMIC_WRAPPER(Max, float) { return; } - int *const address_as_i = (int *)address; + int *const address_as_i = reinterpret_cast(address); int old = *address_as_i, assumed; do { @@ -187,9 +187,9 @@ CUDA_ATOMIC_WRAPPER(Max, double) { return; } - unsigned long long int *const address_as_ull = - (unsigned long long int *)address; - unsigned long long int old = *address_as_ull, assumed; + unsigned long long int *const address_as_ull = // NOLINT + reinterpret_cast(address); // NOLINT + unsigned long long int old = *address_as_ull, assumed; // NOLINT do { assumed = old; @@ -209,12 +209,12 @@ USE_CUDA_ATOMIC(Min, unsigned int); #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 USE_CUDA_ATOMIC(Min, unsigned long long int); // NOLINT #else -CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) { +CUDA_ATOMIC_WRAPPER(Min, unsigned long long int) { // NOLINT if (*address <= val) { return; } - unsigned long long int old = *address, assumed; + unsigned long long int old = *address, assumed; // NOLINT do { assumed = old; @@ -241,7 +241,7 @@ CUDA_ATOMIC_WRAPPER(Min, float) { return; } - int *const address_as_i = (int *)address; + int *const address_as_i = reinterpret_cast(address); int old = *address_as_i, assumed; do { @@ -259,9 +259,9 @@ CUDA_ATOMIC_WRAPPER(Min, double) { return; } - unsigned long long int *const address_as_ull = - (unsigned long long int *)address; - unsigned long long int old = *address_as_ull, assumed; + unsigned long long int *const address_as_ull = // NOLINT + reinterpret_cast(address); // NOLINT + unsigned long long int old = *address_as_ull, assumed; // NOLINT do { assumed = old; -- GitLab From 86fa0432050831c562c5a170f3db8fb0477aeda8 Mon Sep 17 00:00:00 2001 From: gongweibao Date: Sun, 27 Sep 2020 10:27:14 +0800 Subject: [PATCH 108/117] init test=develop (#27554) --- .../fluid/tests/unittests/CMakeLists.txt | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 09797576801..97a3ebc2135 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -531,15 +531,15 @@ if(NOT WIN32) endif() if(NOT APPLE AND NOT WIN32) - bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_auto_checkpoint3 START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_auto_checkpoint_multiple START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_auto_checkpoint_dist_basic START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_hdfs1 START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_hdfs2 START_BASH dist_test.sh TIMEOUT 140) - bash_test_modules(test_hdfs3 START_BASH dist_test.sh TIMEOUT 140) + bash_test_modules(test_auto_checkpoint START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_auto_checkpoint1 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_auto_checkpoint2 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_auto_checkpoint3 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_auto_checkpoint_multiple START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_auto_checkpoint_dist_basic START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_hdfs1 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_hdfs2 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") + bash_test_modules(test_hdfs3 START_BASH dist_test.sh TIMEOUT 140 LABELS "RUN_TYPE=EXCLUSIVE:NIGHTLY") endif() add_subdirectory(sequence) -- GitLab From b9d739a7eaf3d75002d5eb233b75b7aa8affd1f1 Mon Sep 17 00:00:00 2001 From: Double_V Date: Sun, 27 Sep 2020 10:32:50 +0800 Subject: [PATCH 109/117] fix pool bug, test=develop (#27537) * fix pool bug, test=develop * fix coverage,test=develop * fix bug, test=develop --- .../fluid/tests/unittests/test_pool1d_api.py | 18 +++++++++ python/paddle/nn/functional/pooling.py | 39 ++++++++++++------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py index 25216175d59..c1169dfc521 100644 --- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -195,6 +195,23 @@ class TestPool1d_API(unittest.TestCase): result = max_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_return_index_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result, index = F.max_pool1d( + input, kernel_size=2, stride=2, padding=0, return_indices=True) + + result_np = max_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[0]) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool1d_dg = paddle.nn.layer.MaxPool1d( + kernel_size=2, stride=None, padding=0) + result = max_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_padding_same(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32]).astype("float32") @@ -228,6 +245,7 @@ class TestPool1d_API(unittest.TestCase): self.check_avg_static_results(place) self.check_max_dygraph_padding_same(place) self.check_avg_dygraph_padding_same(place) + self.check_max_dygraph_return_index_results(place) class TestPool2dError_API(unittest.TestCase): diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index 1eb9167d035..bed5df8fa78 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -571,15 +571,26 @@ def max_pool1d(x, padding = _expand_low_nd_padding(padding) if in_dygraph_mode(): - pool_out = core.ops.max_pool2d_with_index( - x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, - 'paddings', padding, 'padding_algorithm', padding_algorithm, - 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, - 'exclusive', True, 'data_format', data_format) - return (squeeze(pool_out[0], [2]), squeeze( - pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) + if return_indices: + pool_out = core.ops.max_pool2d_with_index( + x, 'ksize', kernel_size, 'global_pooling', False, 'strides', + stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, 'data_format', + data_format) + return (squeeze(pool_out[0], [2]), squeeze( + pool_out[1], + [2])) if return_indices else squeeze(pool_out[0], [2]) + else: + pool_out = core.ops.pool2d( + x, 'pooling_type', 'max', 'ksize', kernel_size, + 'global_pooling', False, 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, 'use_cudnn', True, + 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) + return squeeze(pool_out, [2]) - op_type = 'max_pool2d_with_index' + op_type = 'max_pool2d_with_index' if return_indices else "pool2d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -696,7 +707,7 @@ def max_pool2d(x, ) if in_dygraph_mode(): - if data_format == "NCHW": + if return_indices: output = core.ops.max_pool2d_with_index( x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, 'paddings', padding, 'padding_algorithm', @@ -704,7 +715,7 @@ def max_pool2d(x, 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) return output if return_indices else output[0] - elif data_format == "NHWC" and not return_indices: + else: output = core.ops.pool2d( x, 'pooling_type', 'max', 'ksize', kernel_size, 'global_pooling', False, 'padding_algorithm', padding_algorithm, @@ -713,7 +724,7 @@ def max_pool2d(x, 'data_format', data_format) return output - op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "pool2d" + op_type = 'max_pool2d_with_index' if return_indices else "pool2d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -822,7 +833,7 @@ def max_pool3d(x, ) if in_dygraph_mode(): - if data_format == "NCDHW": + if return_indices: output = core.ops.max_pool3d_with_index( x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides', stride, 'paddings', padding, 'global_pooling', False, @@ -830,7 +841,7 @@ def max_pool3d(x, 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) return output if return_indices else output[0] - elif data_format == "NDHWC" and not return_indices: + else: output = core.ops.pool3d( x, 'pooling_type', 'max', 'ksize', kernel_size, 'global_pooling', False, 'padding_algorithm', padding_algorithm, @@ -839,7 +850,7 @@ def max_pool3d(x, 'data_format', data_format) return output - op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "pool3d" + op_type = "max_pool3d_with_index" if return_indices else "pool3d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) -- GitLab From 42065ba37af9121188a831848234a7dd879d0d2c Mon Sep 17 00:00:00 2001 From: Double_V Date: Sun, 27 Sep 2020 10:33:33 +0800 Subject: [PATCH 110/117] fix activate_nn_grad, test=develop (#27555) --- .../paddle/fluid/tests/unittests/test_activation_nn_grad.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index e8b8a45fb67..c97cca654a7 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -78,15 +78,17 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase): class TestELUDoubleGradCheck(unittest.TestCase): @prog_scope() def func(self, place): - shape = [2, 3, 7, 9] + shape = [2, 3, 6, 6] eps = 1e-6 alpha = 1.1 dtype = np.float64 + SEED = 0 x = layers.data('x', shape, False, dtype) x.persistable = True y = layers.elu(x, alpha=alpha) + np.random.RandomState(SEED) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) gradient_checker.double_grad_check( [x], y, x_init=x_arr, place=place, eps=eps) -- GitLab From 0e101c4f6fd4f2d038d725ecae5729d904ef694e Mon Sep 17 00:00:00 2001 From: Chengmo Date: Sun, 27 Sep 2020 10:36:15 +0800 Subject: [PATCH 111/117] Fix test dist fleet heter ctr (#27513) * fix test_dist_fleet_heter_ctr & peformance update --- .../framework/distributed_strategy.proto | 1 + .../operators/distributed/parameter_recv.cc | 13 ++++---- .../distributed/fleet/base/role_maker.py | 8 ++--- .../fleet/runtime/parameter_server_runtime.py | 18 +++++++++-- .../tests/unittests/ctr_dataset_reader.py | 2 +- .../tests/unittests/dist_fleet_heter_ctr.py | 7 ---- .../tests/unittests/test_communicator_geo.py | 1 + .../tests/unittests/test_communicator_sync.py | 1 + .../test_dist_fleet_a_sync_optimizer_async.py | 2 ++ .../test_dist_fleet_a_sync_optimizer_sync.py | 1 + .../tests/unittests/test_dist_fleet_base.py | 5 ++- .../unittests/test_dist_fleet_heter_base.py | 31 ++++-------------- .../unittests/test_dist_fleet_heter_ctr.py | 32 ------------------- 13 files changed, 42 insertions(+), 80 deletions(-) diff --git a/paddle/fluid/framework/distributed_strategy.proto b/paddle/fluid/framework/distributed_strategy.proto index c9ae5a67950..21e28d7ac86 100644 --- a/paddle/fluid/framework/distributed_strategy.proto +++ b/paddle/fluid/framework/distributed_strategy.proto @@ -97,6 +97,7 @@ message AsyncConfig { optional int32 thread_pool_size = 6 [ default = 1 ]; optional int32 send_wait_times = 7 [ default = 1 ]; optional bool runtime_split_send_recv = 8 [ default = false ]; + optional bool launch_barrier = 9 [ default = true ]; } message PipelineConfig { optional int32 micro_batch = 1 [ default = 1 ]; } diff --git a/paddle/fluid/operators/distributed/parameter_recv.cc b/paddle/fluid/operators/distributed/parameter_recv.cc index a91df5b3c47..51b13bc2c56 100644 --- a/paddle/fluid/operators/distributed/parameter_recv.cc +++ b/paddle/fluid/operators/distributed/parameter_recv.cc @@ -175,10 +175,6 @@ void RecvGeoSparseRecords(const CommContext &rpc_ctx, template void RecvLodTensor(const CommContext &rpc_ctx, const framework::Scope &scope) { - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); - auto cpu_place = platform::CPUPlace(); - auto &cpu_ctx = *pool.Get(cpu_place); - distributed::RPCClient *rpc_client = distributed::RPCClient::GetInstance(rpc_ctx.trainer_id); @@ -188,8 +184,13 @@ void RecvLodTensor(const CommContext &rpc_ctx, const framework::Scope &scope) { if (rpc_ctx.origin_varnames.size() == 1 && rpc_ctx.splited_varnames.size() == 1) { auto varname = rpc_ctx.origin_varnames[0]; - VLOG(4) << "recv " << varname << " from " << rpc_ctx.epmap[0]; - rets.push_back(rpc_client->AsyncGetVarNoBarrier(rpc_ctx.epmap[0], cpu_ctx, + const auto place = + scope.FindVar(varname)->Get().place(); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &ctx = *pool.Get(place); + VLOG(4) << "recv " << varname << " from " << rpc_ctx.epmap[0] << " in gpu? " + << platform::is_gpu_place(place); + rets.push_back(rpc_client->AsyncGetVarNoBarrier(rpc_ctx.epmap[0], ctx, scope, varname, varname)); for (size_t i = 0; i < rets.size(); i++) { diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index f66f013e4db..36da7264efe 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -495,7 +495,7 @@ class RoleMakerBase(object): Returns: string: all heter_trainers'endpoints """ - assert self._heter_trainer_endpoints != [] + assert self._heter_trainer_endpoints != [], "Heter Worker Endpoints Not initialized" return self._heter_trainer_endpoints def _get_heter_worker_endpoint(self): @@ -505,10 +505,10 @@ class RoleMakerBase(object): e.g: if we have 4 cpu-trainer(default), 2 gpu-trainer(heter) then No.0 and No.2 cpu-trainer will work with No.0 gpu-trainer - and No.1 and No.3 cpu-trainer will work with No.1 gpu-trainerr + and No.1 and No.3 cpu-trainer will work with No.1 gpu-trainer """ - assert self._heter_trainer_endpoints != [] - return self._heter_trainer_endpoints[(self._current_id + 1) % + assert self._heter_trainer_endpoints != [], "Heter Worker Endpoints Not initialized" + return self._heter_trainer_endpoints[(self._current_id) % self._heter_worker_num()] def _get_heter_worker_device(self): diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 6dd4661f000..42be7e869d9 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -23,6 +23,7 @@ from paddle.fluid.executor import Executor from paddle.fluid.parallel_executor import ParallelExecutor from .runtime_base import RuntimeBase +from ..base.private_helper_function import wait_server_ready class ParameterServerRuntime(RuntimeBase): @@ -94,8 +95,8 @@ class ParameterServerRuntime(RuntimeBase): return False if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ - var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ - var.desc.type() == core.VarDesc.VarType.READER: + var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ + var.desc.type() == core.VarDesc.VarType.READER: return False return var.persistable @@ -161,6 +162,17 @@ class ParameterServerRuntime(RuntimeBase): trainer_config = self.async_strategy.get_trainer_runtime_config() + dist_strategy = self.context["valid_strategy"] + launch_barrier = dist_strategy.a_sync_configs["launch_barrier"] + if launch_barrier: + # for trainer wait server ready + wait_server_ready(self.role_maker._get_pserver_endpoints()) + + # for ps-heter mode, wait heter worker ready + if self.role_maker._is_heter_parameter_server_mode and self.role_maker._is_worker( + ): + wait_server_ready(self.role_maker._get_heter_worker_endpoints()) + lrs = _has_global_step(_get_lr_ops(self.origin_main_program)) if lrs: @@ -312,7 +324,7 @@ class ParameterServerRuntime(RuntimeBase): opts = _get_optimize_ops(self.origin_main_program) for op in opts: if "Param" in op.input_names and \ - "LearningRate" in op.input_names and op.input("Param")[0] == param_name: + "LearningRate" in op.input_names and op.input("Param")[0] == param_name: return op def _save_dense_params(self, executor, dirname, context, main_program): diff --git a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py index 15e98481c26..92d84b8b3f3 100644 --- a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py +++ b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py @@ -153,7 +153,7 @@ def gen_fake_line(dnn_data_num=7, return line -def prepare_fake_data(file_nums=9, file_lines=1000): +def prepare_fake_data(file_nums=6, file_lines=1000): """ Create fake data with same type as avazu_ctr_data """ diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index f62ad66e462..fefaecd3b89 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -206,13 +206,6 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): debug=int(os.getenv("Debug", "0"))) pass_time = time.time() - pass_start print("do_dataset_training done. using time {}".format(pass_time)) - if os.getenv("SAVE_MODEL") == "1": - model_dir = tempfile.mkdtemp() - fleet.save_inference_model(exe, model_dir, - [feed.name for feed in self.feeds], - self.avg_cost) - self.check_model_right(model_dir) - shutil.rmtree(model_dir) fleet.stop_worker() print("do_dataset_training stop worker.") diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 5916000fba7..f625e1de4a3 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -113,6 +113,7 @@ class TestCommunicatorGeoEnd2End(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True strategy.a_sync_configs = {"k_steps": 100} + strategy.a_sync_configs = {"launch_barrier": False} if training_role == "TRAINER": self.run_trainer(role, strategy) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_sync.py b/python/paddle/fluid/tests/unittests/test_communicator_sync.py index 95b209b1460..78e2050d3b4 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_sync.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_sync.py @@ -51,6 +51,7 @@ class TestCommunicator(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = False + strategy.a_sync_configs = {"launch_barrier": False} optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py index 7f55e956a94..845be6eda6e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_async.py @@ -52,6 +52,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True + strategy.a_sync_configs = {"launch_barrier": False} optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) @@ -92,6 +93,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = True + strategy.a_sync_configs = {"launch_barrier": False} optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py index db3f2afb366..668b4ad872f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_sync.py @@ -44,6 +44,7 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): strategy = paddle.distributed.fleet.DistributedStrategy() strategy.a_sync = False + strategy.a_sync_configs = {"launch_barrier": False} optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py index c46d1dc5b0f..195b3f8de0a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_base.py @@ -312,9 +312,6 @@ class TestFleetBase(unittest.TestCase): "========================Error tr1_err end===========================" ) - self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") - self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") - # close trainer file tr0_pipe.close() tr1_pipe.close() @@ -325,6 +322,8 @@ class TestFleetBase(unittest.TestCase): ps1.terminate() shutil.rmtree(gloo_path) + self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") + self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") return 0, 0 def check_with_place(self, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py index ba97c5079bd..6c5a1d6e36c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_base.py @@ -81,7 +81,7 @@ class FleetDistHeterRunnerBase(object): def build_strategy(self, args): self.strategy = paddle.distributed.fleet.DistributedStrategy() self.strategy.a_sync = True - + self.strategy.a_sync_configs = {"launch_barrier": True} return self.strategy def build_optimizer(self, avg_cost, strategy): @@ -237,7 +237,10 @@ class TestFleetHeterBase(unittest.TestCase): return heter0_proc, heter1_proc, heter0_pipe, heter1_pipe def _run_cluster(self, model, envs): - env = {'GRAD_CLIP': str(self._grad_clip_mode)} + env = { + 'GRAD_CLIP': str(self._grad_clip_mode), + 'FLAGS_eager_delete_tensor_gb': str(-1) + } python_path = self._python_interp gloo_path = tempfile.mkdtemp() @@ -286,27 +289,6 @@ class TestFleetHeterBase(unittest.TestCase): tr0_ret = tr0.returncode tr1_ret = tr0.returncode - print("tr get returncode: {}".format(tr0_ret)) - if tr0_ret != 0: - print( - "========================Error tr0_err begin===========================" - ) - os.system("cat {}".format(tempfile.gettempdir() + "/tr0_err.log")) - print( - "========================Error tr0_err end===========================" - ) - - if tr1_ret != 0: - print( - "========================Error tr1_err begin===========================" - ) - os.system("cat {}".format(tempfile.gettempdir() + "/tr1_err.log")) - print( - "========================Error tr1_err end===========================" - ) - - self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") - self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") # close trainer file tr0_pipe.close() @@ -320,7 +302,8 @@ class TestFleetHeterBase(unittest.TestCase): ps1.terminate() heter0.terminate() heter1.terminate() - + self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check") + self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check") shutil.rmtree(gloo_path) return 0, 0 diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py index b3e38a42128..5f7d7b21d7f 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py @@ -23,38 +23,6 @@ import paddle paddle.enable_static() -class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): - def _setup_config(self): - self._mode = "async" - self._reader = "dataset" - - def check_with_place(self, - model_file, - delta=1e-3, - check_error_log=False, - need_envs={}): - required_envs = { - "PATH": os.getenv("PATH", ""), - "PYTHONPATH": os.getenv("PYTHONPATH", ""), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), - "FLAGS_rpc_deadline": "5000", # 5sec to fail fast - "http_proxy": "", - "CPU_NUM": "3" - } - - required_envs.update(need_envs) - - if check_error_log: - required_envs["GLOG_v"] = "3" - required_envs["GLOG_logtostderr"] = "1" - - tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) - - def test_dist_train(self): - self.check_with_place( - "dist_fleet_heter_ctr.py", delta=1e-5, check_error_log=True) - - class TestDistHeterPyreaderAsync2x2(TestFleetHeterBase): def _setup_config(self): self._mode = "async" -- GitLab From 9b124014343cf07a3a2c88006a66f5b3de6af8aa Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Sun, 27 Sep 2020 10:47:04 +0800 Subject: [PATCH 112/117] modified storage address of block file (#27576) --- tools/check_file_diff_approvals.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 84254cc89bb..16e61d7c77a 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -286,7 +286,7 @@ fi # Get the list of PR authors with unresolved unit test issues pip install PyGithub # For getting PR related data -wget https://paddle-ci.gz.bcebos.com/blk/block.txt --no-check-certificate +wget https://sys-p0.bj.bcebos.com/blk/block.txt --no-check-certificate wget https://sys-p0.bj.bcebos.com/bk-ci/bk.txt --no-check-certificate HASUTFIXED=`python ${PADDLE_ROOT}/tools/check_ut.py | grep "has unit-test to be fixed" || true` if [ "${HASUTFIXED}" != "" ]; then -- GitLab From 162b4d6c13f6f38a234423bc984fb41710796475 Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Sun, 27 Sep 2020 11:47:36 +0800 Subject: [PATCH 113/117] remove to_variable from 2.0 (#27528) --- python/paddle/__init__.py | 1 - python/paddle/fluid/dygraph/nn.py | 7 +--- .../tests/unittests/test_activation_op.py | 8 ++-- .../fluid/tests/unittests/test_adamax_api.py | 2 +- .../fluid/tests/unittests/test_adamw_op.py | 4 +- .../unittests/test_adaptive_avg_pool2d.py | 4 +- .../unittests/test_adaptive_avg_pool3d.py | 4 +- .../unittests/test_adaptive_max_pool2d.py | 4 +- .../unittests/test_adaptive_max_pool3d.py | 4 +- .../fluid/tests/unittests/test_addmm_op.py | 6 +-- .../fluid/tests/unittests/test_arange.py | 6 +-- .../fluid/tests/unittests/test_cholesky_op.py | 2 +- .../fluid/tests/unittests/test_clip_op.py | 6 +-- .../fluid/tests/unittests/test_concat_op.py | 6 +-- .../unittests/test_cosine_similarity_api.py | 16 ++++---- .../fluid/tests/unittests/test_cumsum_op.py | 3 +- .../tests/unittests/test_default_dtype.py | 1 - .../unittests/test_directory_migration.py | 2 +- .../test_flatten_contiguous_range_op.py | 2 +- .../tests/unittests/test_imperative_basic.py | 4 +- .../test_imperative_selected_rows.py | 2 +- .../tests/unittests/test_isfinite_v2_op.py | 2 +- .../tests/unittests/test_jit_save_load.py | 14 +++---- .../tests/unittests/test_kldiv_loss_op.py | 2 +- .../fluid/tests/unittests/test_l1_loss.py | 8 ++-- .../fluid/tests/unittests/test_log_softmax.py | 4 +- .../fluid/tests/unittests/test_logsumexp.py | 4 +- .../fluid/tests/unittests/test_max_op.py | 2 +- .../fluid/tests/unittests/test_maximum_op.py | 8 ++-- .../fluid/tests/unittests/test_mean_op.py | 2 +- .../fluid/tests/unittests/test_min_op.py | 2 +- .../fluid/tests/unittests/test_randn_op.py | 2 +- .../tests/unittests/test_retain_graph.py | 4 +- .../tests/unittests/test_transformer_api.py | 40 +++++++++---------- .../tests/unittests/test_zeros_like_op.py | 2 +- python/paddle/tensor/linalg.py | 18 +++------ python/paddle/tensor/math.py | 3 +- tools/wlist.json | 5 ++- 38 files changed, 102 insertions(+), 114 deletions(-) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index e749cf88b6a..40275a2ce71 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -230,7 +230,6 @@ from .framework import CPUPlace #DEFINE_ALIAS from .framework import CUDAPlace #DEFINE_ALIAS from .framework import CUDAPinnedPlace #DEFINE_ALIAS -from .framework import to_variable #DEFINE_ALIAS from .framework import grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index a14c3a81c12..05269028acc 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -3230,14 +3230,11 @@ class Flatten(layers.Layer): .. code-block:: python import paddle - from paddle import to_variable import numpy as np + paddle.disable_static() inp_np = np.ones([5, 2, 3, 4]).astype('float32') - - paddle.disable_static() - - inp_np = to_variable(inp_np) + inp_np = paddle.to_tensor(inp_np) flatten = paddle.nn.Flatten(start_axis=1, stop_axis=2) flatten_res = flatten(inp_np) diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 791f1ee2dfa..ad7539e76e4 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -228,7 +228,7 @@ class TestTanhAPI(unittest.TestCase): def test_dygraph_api(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out1 = F.tanh(x) out2 = paddle.tanh(x) th = paddle.nn.Tanh() @@ -573,7 +573,7 @@ class TestHardShrinkAPI(unittest.TestCase): def test_dygraph_api(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out1 = F.hardshrink(x) hd = paddle.nn.Hardshrink() out2 = hd(x) @@ -639,7 +639,7 @@ class TestHardtanhAPI(unittest.TestCase): def test_dygraph_api(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out1 = F.hardtanh(x) m = paddle.nn.Hardtanh() out2 = m(x) @@ -1063,7 +1063,7 @@ class TestLeakyReluAPI(unittest.TestCase): def test_dygraph_api(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out1 = F.leaky_relu(x) m = paddle.nn.LeakyReLU() out2 = m(x) diff --git a/python/paddle/fluid/tests/unittests/test_adamax_api.py b/python/paddle/fluid/tests/unittests/test_adamax_api.py index 5a33e11d286..6d2ec0eefbb 100644 --- a/python/paddle/fluid/tests/unittests/test_adamax_api.py +++ b/python/paddle/fluid/tests/unittests/test_adamax_api.py @@ -25,7 +25,7 @@ class TestAdamaxAPI(unittest.TestCase): def test_adamax_api_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_variable(value) + a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) adam = paddle.optimizer.Adamax( learning_rate=0.01, diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index cce24b57d2c..b799508f6b8 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -22,7 +22,7 @@ class TestAdamWOp(unittest.TestCase): def test_adamw_op_dygraph(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_variable(value) + a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) adam = paddle.optimizer.AdamW( learning_rate=0.01, @@ -37,7 +37,7 @@ class TestAdamWOp(unittest.TestCase): def test_adamw_op_coverage(self): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") - a = paddle.to_variable(value) + a = paddle.to_tensor(value) linear = paddle.nn.Linear(13, 5) adam = paddle.optimizer.AdamW( learning_rate=0.0, diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py index e3c70884ebc..b8c5bd29491 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool2d.py @@ -147,7 +147,7 @@ class TestAdaptiveAvgPool2dAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out_1 = paddle.nn.functional.adaptive_avg_pool2d( x=x, output_size=[3, 3]) @@ -245,7 +245,7 @@ class TestAdaptiveAvgPool2dClassAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=[3, 3]) out_1 = adaptive_avg_pool(x=x) diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py index a3c9dd91a69..bb36aaebf08 100755 --- a/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_avg_pool3d.py @@ -162,7 +162,7 @@ class TestAdaptiveAvgPool3dAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out_1 = paddle.nn.functional.adaptive_avg_pool3d( x=x, output_size=[3, 3, 3]) @@ -262,7 +262,7 @@ class TestAdaptiveAvgPool3dClassAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d( output_size=[3, 3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py index d78788eb1e7..dfa6f3226c8 100644 --- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool2d.py @@ -147,7 +147,7 @@ class TestAdaptiveMaxPool2dAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out_1 = paddle.nn.functional.adaptive_max_pool2d( x=x, return_indices=False, output_size=[3, 3]) @@ -240,7 +240,7 @@ class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3]) out_1 = adaptive_max_pool(x=x) diff --git a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py index a7de0a5c6a7..1fa703688cd 100755 --- a/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py +++ b/python/paddle/fluid/tests/unittests/test_adaptive_max_pool3d.py @@ -162,7 +162,7 @@ class TestAdaptiveMaxPool3dAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) out_1 = paddle.nn.functional.adaptive_max_pool3d( x=x, output_size=[3, 3, 3]) @@ -257,7 +257,7 @@ class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase): if core.is_compiled_with_cuda() else [False]): place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() paddle.disable_static(place=place) - x = paddle.to_variable(self.x_np) + x = paddle.to_tensor(self.x_np) adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d( output_size=[3, 3, 3]) diff --git a/python/paddle/fluid/tests/unittests/test_addmm_op.py b/python/paddle/fluid/tests/unittests/test_addmm_op.py index 6e66c0c0029..6238d7dd4a1 100644 --- a/python/paddle/fluid/tests/unittests/test_addmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_addmm_op.py @@ -244,9 +244,9 @@ class TestAddMMAPI(unittest.TestCase): def test_error1(): data_x_wrong = np.ones((2, 3)).astype(np.float32) - x = paddle.to_variable(data_x_wrong) - y = paddle.to_variable(data_y) - input = paddle.to_variable(data_input) + x = paddle.to_tensor(data_x_wrong) + y = paddle.to_tensor(data_y) + input = paddle.to_tensor(data_input) out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 ) self.assertRaises(ValueError, test_error1) ''' diff --git a/python/paddle/fluid/tests/unittests/test_arange.py b/python/paddle/fluid/tests/unittests/test_arange.py index 29003d28e44..d62c08b072b 100644 --- a/python/paddle/fluid/tests/unittests/test_arange.py +++ b/python/paddle/fluid/tests/unittests/test_arange.py @@ -98,9 +98,9 @@ class TestArangeImperative(unittest.TestCase): x2 = paddle.tensor.arange(5) x3 = paddle.tensor.creation.arange(5) - start = paddle.to_variable(np.array([0], 'float32')) - end = paddle.to_variable(np.array([5], 'float32')) - step = paddle.to_variable(np.array([1], 'float32')) + start = paddle.to_tensor(np.array([0], 'float32')) + end = paddle.to_tensor(np.array([5], 'float32')) + step = paddle.to_tensor(np.array([1], 'float32')) x4 = paddle.arange(start, end, step, 'int64') paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_cholesky_op.py b/python/paddle/fluid/tests/unittests/test_cholesky_op.py index ab08a0aacbf..2fcec657c14 100644 --- a/python/paddle/fluid/tests/unittests/test_cholesky_op.py +++ b/python/paddle/fluid/tests/unittests/test_cholesky_op.py @@ -96,7 +96,7 @@ class TestDygraph(unittest.TestCase): a = np.random.rand(3, 3) a_t = np.transpose(a, [1, 0]) x_data = np.matmul(a, a_t) + 1e-03 - x = paddle.to_variable(x_data) + x = paddle.to_tensor(x_data) out = paddle.cholesky(x, upper=False) diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py index b56d9f6668e..2946798a82f 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_op.py @@ -168,9 +168,9 @@ class TestClipAPI(unittest.TestCase): paddle.disable_static(place) data_shape = [1, 9, 9, 4] data = np.random.random(data_shape).astype('float32') - images = paddle.to_variable(data, dtype='float32') - v_min = paddle.to_variable(np.array([0.2], dtype=np.float32)) - v_max = paddle.to_variable(np.array([0.8], dtype=np.float32)) + images = paddle.to_tensor(data, dtype='float32') + v_min = paddle.to_tensor(np.array([0.2], dtype=np.float32)) + v_max = paddle.to_tensor(np.array([0.8], dtype=np.float32)) out_1 = paddle.clip(images, min=0.2, max=0.8) out_2 = paddle.clip(images, min=0.2, max=0.9) diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index b4dbba7eead..14c10e7aa20 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -285,9 +285,9 @@ class TestConcatAPI(unittest.TestCase): in2 = np.array([[11, 12, 13], [14, 15, 16]]) in3 = np.array([[21, 22], [23, 24]]) paddle.disable_static() - x1 = paddle.to_variable(in1) - x2 = paddle.to_variable(in2) - x3 = paddle.to_variable(in3) + x1 = paddle.to_tensor(in1) + x2 = paddle.to_tensor(in2) + x3 = paddle.to_tensor(in3) out1 = fluid.layers.concat(input=[x1, x2, x3], axis=-1) out2 = paddle.concat(x=[x1, x2], axis=0) np_out1 = np.concatenate([in1, in2, in3], axis=-1) diff --git a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py index 1e25613fa63..a8899d9f022 100644 --- a/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py +++ b/python/paddle/fluid/tests/unittests/test_cosine_similarity_api.py @@ -75,8 +75,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): np_x2 = np.random.rand(*shape).astype(np.float32) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) - tesnor_x1 = paddle.to_variable(np_x1) - tesnor_x2 = paddle.to_variable(np_x2) + tesnor_x1 = paddle.to_tensor(np_x1) + tesnor_x2 = paddle.to_tensor(np_x2) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) self.assertTrue(np.allclose(y.numpy(), np_out)) @@ -92,8 +92,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): np_x2 = np.random.rand(*shape).astype(np.float32) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) - tesnor_x1 = paddle.to_variable(np_x1) - tesnor_x2 = paddle.to_variable(np_x2) + tesnor_x1 = paddle.to_tensor(np_x1) + tesnor_x2 = paddle.to_tensor(np_x2) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) self.assertTrue(np.allclose(y.numpy(), np_out)) @@ -110,8 +110,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): np_x2 = np.random.rand(*shape2).astype(np.float32) np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) - tesnor_x1 = paddle.to_variable(np_x1) - tesnor_x2 = paddle.to_variable(np_x2) + tesnor_x1 = paddle.to_tensor(np_x1) + tesnor_x2 = paddle.to_tensor(np_x2) y = F.cosine_similarity(tesnor_x1, tesnor_x2, axis=axis, eps=eps) self.assertTrue(np.allclose(y.numpy(), np_out)) @@ -129,8 +129,8 @@ class TestCosineSimilarityAPI(unittest.TestCase): np_out = self._get_numpy_out(np_x1, np_x2, axis=axis, eps=eps) cos_sim_func = nn.CosineSimilarity(axis=axis, eps=eps) - tesnor_x1 = paddle.to_variable(np_x1) - tesnor_x2 = paddle.to_variable(np_x2) + tesnor_x1 = paddle.to_tensor(np_x1) + tesnor_x2 = paddle.to_tensor(np_x2) y = cos_sim_func(tesnor_x1, tesnor_x2) self.assertTrue(np.allclose(y.numpy(), np_out)) diff --git a/python/paddle/fluid/tests/unittests/test_cumsum_op.py b/python/paddle/fluid/tests/unittests/test_cumsum_op.py index ad121fac8cc..818e15bb319 100644 --- a/python/paddle/fluid/tests/unittests/test_cumsum_op.py +++ b/python/paddle/fluid/tests/unittests/test_cumsum_op.py @@ -21,13 +21,12 @@ import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard -from paddle import to_variable class TestCumsumOp(unittest.TestCase): def run_cases(self): data_np = np.arange(12).reshape(3, 4) - data = to_variable(data_np) + data = paddle.to_tensor(data_np) y = paddle.cumsum(data) z = np.cumsum(data_np) diff --git a/python/paddle/fluid/tests/unittests/test_default_dtype.py b/python/paddle/fluid/tests/unittests/test_default_dtype.py index 057933fc7a7..29ca9a93985 100644 --- a/python/paddle/fluid/tests/unittests/test_default_dtype.py +++ b/python/paddle/fluid/tests/unittests/test_default_dtype.py @@ -20,7 +20,6 @@ import paddle import paddle.fluid as fluid from paddle.fluid.dygraph import Linear import paddle.fluid.core as core -from paddle import to_variable class TestDefaultType(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 529fff158c5..2f35b45aa67 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -36,7 +36,7 @@ class TestDirectory(unittest.TestCase): def test_new_directory(self): new_directory = [ 'paddle.enable_static', 'paddle.disable_static', - 'paddle.in_dynamic_mode', 'paddle.to_variable', 'paddle.grad', + 'paddle.in_dynamic_mode', 'paddle.to_tensor', 'paddle.grad', 'paddle.no_grad', 'paddle.save', 'paddle.load', 'paddle.static.save', 'paddle.static.load', 'paddle.distributed.ParallelEnv', diff --git a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py index 642044bb4b1..e0e487eff11 100644 --- a/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py +++ b/python/paddle/fluid/tests/unittests/test_flatten_contiguous_range_op.py @@ -195,7 +195,7 @@ class TestFlattenPython(unittest.TestCase): def test_Negative(): paddle.disable_static() - img = paddle.to_variable(x) + img = paddle.to_tensor(x) out = paddle.flatten(img, start_axis=-2, stop_axis=-1) return out.numpy().shape diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 22f16287c33..7378975aa37 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -211,7 +211,7 @@ class TestImperative(unittest.TestCase): paddle.disable_static() self.assertTrue(paddle.in_dynamic_mode()) np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - var_inp = paddle.to_variable(np_inp) + var_inp = paddle.to_tensor(np_inp) mlp = MLP(input_size=2) out = mlp(var_inp) dy_out1 = out.numpy() @@ -221,7 +221,7 @@ class TestImperative(unittest.TestCase): self.assertFalse(paddle.in_dynamic_mode()) paddle.disable_static() self.assertTrue(paddle.in_dynamic_mode()) - var_inp = paddle.to_variable(np_inp) + var_inp = paddle.to_tensor(np_inp) mlp = MLP(input_size=2) out = mlp(var_inp) dy_out2 = out.numpy() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py index 59ddb365e53..97f7162e997 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows.py @@ -54,7 +54,7 @@ class TestSimpleNet(unittest.TestCase): # grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0) input_word = np.array([[1, 2], [2, 1]]).astype('int64') - input = paddle.to_variable(input_word) + input = paddle.to_tensor(input_word) simplenet = SimpleNet(20, 32, dtype) adam = SGDOptimizer( diff --git a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py index 8a868e751f0..281dc7caded 100644 --- a/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_isfinite_v2_op.py @@ -41,7 +41,7 @@ def run_dygraph(x_np, op_str, use_gpu=True): if use_gpu and fluid.core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) paddle.disable_static(place) - x = paddle.to_variable(x_np) + x = paddle.to_tensor(x_np) dygraph_result = getattr(paddle.tensor, op_str)(x) return dygraph_result diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 7e6ca8076de..99404246185 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -543,9 +543,9 @@ class TestJitSaveMultiCases(unittest.TestCase): loaded_layer = paddle.jit.load(model_path) loaded_layer.eval() # inference & compare - x = paddle.to_variable(np.random.random((1, 784)).astype('float32')) + x = paddle.to_tensor(np.random.random((1, 784)).astype('float32')) if with_label: - y = paddle.to_variable(np.random.random((1, 1)).astype('int64')) + y = paddle.to_tensor(np.random.random((1, 1)).astype('int64')) pred, _ = layer(x, y) pred = pred.numpy() else: @@ -677,7 +677,7 @@ class TestJitSaveMultiCases(unittest.TestCase): model_path = "test_not_prune_output_spec_name_warning" configs = paddle.SaveLoadConfig() - out = paddle.to_variable(np.random.random((1, 1)).astype('float')) + out = paddle.to_tensor(np.random.random((1, 1)).astype('float')) configs.output_spec = [out] paddle.jit.save(layer, model_path, configs=configs) @@ -709,7 +709,7 @@ class TestJitSaveMultiCases(unittest.TestCase): model_path = "test_prune_to_static_after_train" configs = paddle.SaveLoadConfig() - out = paddle.to_variable(np.random.random((1, 1)).astype('float')) + out = paddle.to_tensor(np.random.random((1, 1)).astype('float')) configs.output_spec = [out] with self.assertRaises(ValueError): paddle.jit.save( @@ -730,7 +730,7 @@ class TestJitSaveLoadEmptyLayer(unittest.TestCase): def test_save_load_empty_layer(self): layer = EmptyLayer() - x = paddle.to_variable(np.random.random((10)).astype('float32')) + x = paddle.to_tensor(np.random.random((10)).astype('float32')) out = layer(x) paddle.jit.save(layer, self.model_path) load_layer = paddle.jit.load(self.model_path) @@ -746,8 +746,8 @@ class TestJitSaveLoadNoParamLayer(unittest.TestCase): def test_save_load_no_param_layer(self): layer = NoParamLayer() - x = paddle.to_variable(np.random.random((5)).astype('float32')) - y = paddle.to_variable(np.random.random((5)).astype('float32')) + x = paddle.to_tensor(np.random.random((5)).astype('float32')) + y = paddle.to_tensor(np.random.random((5)).astype('float32')) out = layer(x, y) paddle.jit.save(layer, self.model_path) load_layer = paddle.jit.load(self.model_path) diff --git a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py index 041fe4e9043..3a3b7071e04 100644 --- a/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_kldiv_loss_op.py @@ -90,7 +90,7 @@ class TestKLDivLossDygraph(unittest.TestCase): with paddle.fluid.dygraph.guard(): kldiv_criterion = paddle.nn.KLDivLoss(reduction) pred_loss = kldiv_criterion( - paddle.to_variable(x), paddle.to_variable(target)) + paddle.to_tensor(x), paddle.to_tensor(target)) self.assertTrue(np.allclose(pred_loss.numpy(), gt_loss)) def test_kl_loss_batchmean(self): diff --git a/python/paddle/fluid/tests/unittests/test_l1_loss.py b/python/paddle/fluid/tests/unittests/test_l1_loss.py index 6a15fe49477..3c37397cae1 100644 --- a/python/paddle/fluid/tests/unittests/test_l1_loss.py +++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py @@ -26,8 +26,8 @@ class TestFunctionalL1Loss(unittest.TestCase): self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) def run_imperative(self): - input = paddle.to_variable(self.input_np) - label = paddle.to_variable(self.label_np) + input = paddle.to_tensor(self.input_np) + label = paddle.to_tensor(self.label_np) dy_result = paddle.nn.functional.l1_loss(input, label) expected = np.mean(np.abs(self.input_np - self.label_np)) self.assertTrue(np.allclose(dy_result.numpy(), expected)) @@ -106,8 +106,8 @@ class TestClassL1Loss(unittest.TestCase): self.label_np = np.random.random(size=(10, 10, 5)).astype(np.float32) def run_imperative(self): - input = paddle.to_variable(self.input_np) - label = paddle.to_variable(self.label_np) + input = paddle.to_tensor(self.input_np) + label = paddle.to_tensor(self.label_np) l1_loss = paddle.nn.loss.L1Loss() dy_result = l1_loss(input, label) expected = np.mean(np.abs(self.input_np - self.label_np)) diff --git a/python/paddle/fluid/tests/unittests/test_log_softmax.py b/python/paddle/fluid/tests/unittests/test_log_softmax.py index e3d7003eced..9ac4895f499 100644 --- a/python/paddle/fluid/tests/unittests/test_log_softmax.py +++ b/python/paddle/fluid/tests/unittests/test_log_softmax.py @@ -96,7 +96,7 @@ class TestNNLogSoftmaxAPI(unittest.TestCase): # test dygrapg api paddle.disable_static() - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) y = logsoftmax(x) self.assertTrue(np.allclose(y.numpy(), ref_out)) paddle.enable_static() @@ -127,7 +127,7 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): self.assertTrue(np.allclose(out[0], ref_out)) paddle.disable_static() - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) y = F.log_softmax(x, axis, dtype) self.assertTrue(np.allclose(y.numpy(), ref_out), True) paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_logsumexp.py b/python/paddle/fluid/tests/unittests/test_logsumexp.py index cf9203dffcb..9032293070a 100644 --- a/python/paddle/fluid/tests/unittests/test_logsumexp.py +++ b/python/paddle/fluid/tests/unittests/test_logsumexp.py @@ -111,7 +111,7 @@ class TestLogsumexpAPI(unittest.TestCase): self.assertTrue(np.allclose(res[0], out_ref)) paddle.disable_static(self.place) - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) out = paddle.logsumexp(x, axis, keepdim) self.assertTrue(np.allclose(out.numpy(), out_ref)) paddle.enable_static() @@ -126,7 +126,7 @@ class TestLogsumexpAPI(unittest.TestCase): def test_alias(self): paddle.disable_static(self.place) - x = paddle.to_variable(self.x) + x = paddle.to_tensor(self.x) out1 = paddle.logsumexp(x) out2 = paddle.tensor.logsumexp(x) out3 = paddle.tensor.math.logsumexp(x) diff --git a/python/paddle/fluid/tests/unittests/test_max_op.py b/python/paddle/fluid/tests/unittests/test_max_op.py index c9afc4bec66..4786d790b14 100644 --- a/python/paddle/fluid/tests/unittests/test_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_max_op.py @@ -80,7 +80,7 @@ class ApiMaxTest(unittest.TestCase): def test_imperative_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') - x = paddle.to_variable(np_x) + x = paddle.to_tensor(np_x) z = paddle.max(x, axis=0) np_z = z.numpy() z_expected = np.array(np.max(np_x, axis=0)) diff --git a/python/paddle/fluid/tests/unittests/test_maximum_op.py b/python/paddle/fluid/tests/unittests/test_maximum_op.py index 5645597007a..54657d7900e 100644 --- a/python/paddle/fluid/tests/unittests/test_maximum_op.py +++ b/python/paddle/fluid/tests/unittests/test_maximum_op.py @@ -61,8 +61,8 @@ class ApiMaximumTest(unittest.TestCase): def test_dynamic_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) z = paddle.maximum(x, y) np_z = z.numpy() z_expected = np.array(np.maximum(self.input_x, self.input_y)) @@ -73,8 +73,8 @@ class ApiMaximumTest(unittest.TestCase): np_x = np.random.rand(5, 4, 3, 2).astype("float64") np_y = np.random.rand(4, 3).astype("float64") - x = paddle.to_variable(self.input_x) - y = paddle.to_variable(self.input_y) + x = paddle.to_tensor(self.input_x) + y = paddle.to_tensor(self.input_y) result_1 = paddle.maximum(x, y, axis=1) result_2 = paddle.maximum(x, y, axis=-2) self.assertEqual((result_1.numpy() == result_2.numpy()).all(), True) diff --git a/python/paddle/fluid/tests/unittests/test_mean_op.py b/python/paddle/fluid/tests/unittests/test_mean_op.py index 29e79b096cf..f0094e703cd 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_op.py +++ b/python/paddle/fluid/tests/unittests/test_mean_op.py @@ -204,7 +204,7 @@ class TestMeanAPI(unittest.TestCase): paddle.disable_static(self.place) def test_case(x, axis=None, keepdim=False): - x_tensor = paddle.to_variable(x) + x_tensor = paddle.to_tensor(x) out = paddle.mean(x_tensor, axis, keepdim) if isinstance(axis, list): axis = tuple(axis) diff --git a/python/paddle/fluid/tests/unittests/test_min_op.py b/python/paddle/fluid/tests/unittests/test_min_op.py index b9eff05c5ea..9c15d721635 100644 --- a/python/paddle/fluid/tests/unittests/test_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_min_op.py @@ -80,7 +80,7 @@ class ApiMinTest(unittest.TestCase): def test_imperative_api(self): paddle.disable_static() np_x = np.array([10, 10]).astype('float64') - x = paddle.to_variable(np_x) + x = paddle.to_tensor(np_x) z = paddle.min(x, axis=0) np_z = z.numpy() z_expected = np.array(np.min(np_x, axis=0)) diff --git a/python/paddle/fluid/tests/unittests/test_randn_op.py b/python/paddle/fluid/tests/unittests/test_randn_op.py index 9d2c03f3bba..4ddd98a8a73 100644 --- a/python/paddle/fluid/tests/unittests/test_randn_op.py +++ b/python/paddle/fluid/tests/unittests/test_randn_op.py @@ -63,7 +63,7 @@ class TestRandnOpForDygraph(unittest.TestCase): dim_2 = paddle.fill_constant([1], "int32", 50) x3 = paddle.randn(shape=[dim_1, dim_2, 784]) - var_shape = paddle.to_variable(np.array(shape)) + var_shape = paddle.to_tensor(np.array(shape)) x4 = paddle.randn(var_shape) for out in [x1, x2, x3, x4]: diff --git a/python/paddle/fluid/tests/unittests/test_retain_graph.py b/python/paddle/fluid/tests/unittests/test_retain_graph.py index 9abbee17385..98c7e3800c2 100644 --- a/python/paddle/fluid/tests/unittests/test_retain_graph.py +++ b/python/paddle/fluid/tests/unittests/test_retain_graph.py @@ -105,8 +105,8 @@ class TestRetainGraph(unittest.TestCase): A = np.random.rand(2, 3, 32, 32).astype('float32') B = np.random.rand(2, 3, 32, 32).astype('float32') - realA = paddle.to_variable(A) - realB = paddle.to_variable(B) + realA = paddle.to_tensor(A) + realB = paddle.to_tensor(B) fakeB = g(realA) optim_d.clear_gradients() diff --git a/python/paddle/fluid/tests/unittests/test_transformer_api.py b/python/paddle/fluid/tests/unittests/test_transformer_api.py index 7c7a71a3be1..067d1ea5f73 100644 --- a/python/paddle/fluid/tests/unittests/test_transformer_api.py +++ b/python/paddle/fluid/tests/unittests/test_transformer_api.py @@ -487,24 +487,24 @@ class TestTransformer(unittest.TestCase): dropout=dropout, weight_attr=[None], bias_attr=[False]) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) @@ -521,24 +521,24 @@ class TestTransformer(unittest.TestCase): dropout=dropout, weight_attr=[None, None], bias_attr=[False, False]) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) @@ -555,24 +555,24 @@ class TestTransformer(unittest.TestCase): dropout=dropout, weight_attr=[None, None, None], bias_attr=[False, False, True]) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) @@ -588,24 +588,24 @@ class TestTransformer(unittest.TestCase): dim_feedforward=dim_feedforward, dropout=dropout, bias_attr=False) - src = paddle.to_variable( + src = paddle.to_tensor( np.random.rand(batch_size, source_length, d_model).astype( "float32")) - tgt = paddle.to_variable( + tgt = paddle.to_tensor( np.random.rand(batch_size, target_length, d_model).astype( "float32")) src_mask = np.zeros((batch_size, n_head, source_length, source_length)).astype("float32") src_mask[0][0][0][0] = -np.inf - src_mask = paddle.to_variable(src_mask) + src_mask = paddle.to_tensor(src_mask) tgt_mask = np.zeros((batch_size, n_head, target_length, target_length)).astype("float32") tgt_mask[0][0][0][0] = -1e9 memory_mask = np.zeros((batch_size, n_head, target_length, source_length)).astype("float32") memory_mask[0][0][0][0] = -1e9 - tgt_mask, memory_mask = paddle.to_variable( - tgt_mask), paddle.to_variable(memory_mask) + tgt_mask, memory_mask = paddle.to_tensor( + tgt_mask), paddle.to_tensor(memory_mask) trans_output = transformer(src, tgt, src_mask, tgt_mask, memory_mask) diff --git a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py index 21e618a4620..2cea3072809 100644 --- a/python/paddle/fluid/tests/unittests/test_zeros_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_zeros_like_op.py @@ -63,7 +63,7 @@ class TestZerosLikeImpeartive(unittest.TestCase): place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( ) else fluid.CPUPlace() paddle.disable_static(place) - x = paddle.to_variable(np.ones(shape)) + x = paddle.to_tensor(np.ones(shape)) for dtype in [np.bool, np.float32, np.float64, np.int32, np.int64]: out = zeros_like(x, dtype) self.assertEqual((out.numpy() == np.zeros(shape, dtype)).all(), diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 26624d3b5ff..15580b6618e 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -707,20 +707,14 @@ def cross(x, y, axis=None, name=None): Examples: .. code-block:: python import paddle - from paddle import to_variable - import numpy as np - paddle.disable_static() - data_x = np.array([[1.0, 1.0, 1.0], - [2.0, 2.0, 2.0], - [3.0, 3.0, 3.0]]) - data_y = np.array([[1.0, 1.0, 1.0], - [1.0, 1.0, 1.0], - [1.0, 1.0, 1.0]]) - x = to_variable(data_x) - y = to_variable(data_y) - + x = paddle.to_tensor([[1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0]]) + y = paddle.to_tensor([[1.0, 1.0, 1.0], + [1.0, 1.0, 1.0], + [1.0, 1.0, 1.0]]) z1 = paddle.cross(x, y) print(z1.numpy()) # [[-1. -1. -1.] diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 966544c7abb..ce32fb76f5c 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -1650,12 +1650,11 @@ def cumsum(x, axis=None, dtype=None, name=None): .. code-block:: python import paddle - from paddle import to_variable import numpy as np paddle.disable_static() data_np = np.arange(12).reshape(3, 4) - data = to_variable(data_np) + data = paddle.to_tensor(data_np) y = paddle.cumsum(data) print(y.numpy()) diff --git a/tools/wlist.json b/tools/wlist.json index 0ed0b4e4069..9b36ac6adc7 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -251,9 +251,10 @@ "BilinearTensorProduct", "GroupNorm", "SpectralNorm", - "TreeConv", + "TreeConv" + ], + "wlist_temp":[ "prroi_pool", - "to_tensor", "ChunkEvaluator", "EditDistance", "ErrorClipByValue", -- GitLab From 35074963e359ba9ce5e38279fc1205bcee67157d Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Sun, 27 Sep 2020 12:17:50 +0800 Subject: [PATCH 114/117] Refine error msg in paddle/fluid/framework/details [part 2] (#27429) * refine broadcast_op_handle * refine some error messages * refine some files * fix bug * fix bug * fix bug * follow comments * follow comments --- .../framework/details/all_reduce_op_handle.cc | 4 +- .../framework/details/broadcast_op_handle.cc | 32 ++++-- .../details/broadcast_op_handle_test.h | 41 ++++++-- .../fluid/framework/details/build_strategy.cc | 3 +- .../details/eager_deletion_op_handle.cc | 40 +++++--- .../details/fused_all_reduce_op_handle.cc | 85 +++++++++++++--- .../details/fused_broadcast_op_handle.cc | 11 ++- .../details/fused_broadcast_op_handle_test.cc | 5 +- .../framework/details/gather_op_handle.cc | 39 ++++++-- .../details/gather_op_handle_test.cc | 28 +++++- .../fluid/framework/details/nccl_op_handle.h | 61 +++++++----- .../fluid/framework/details/op_handle_base.cc | 49 +++++---- paddle/fluid/framework/details/op_registry.h | 14 +-- .../framework/details/reduce_and_gather.h | 28 +++++- .../framework/details/reduce_op_handle.cc | 57 ++++++++--- .../details/reduce_op_handle_test.cc | 42 ++++++-- .../details/share_tensor_buffer_functor.cc | 9 +- .../details/sparse_all_reduce_op_handle.cc | 99 ++++++++++++++----- 18 files changed, 475 insertions(+), 172 deletions(-) diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index 939a2fc8fc9..78887f3ac51 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -76,7 +76,7 @@ void AllReduceOpHandle::AllReduceImpl( platform::errors::InvalidArgument( "The NoDummyInputSize should be equal " "to the number of places, but got NoDummyInputSize is " - "%d and the number of place is %d.", + "%d and the number of places is %d.", in_var_handles.size(), num_places)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), @@ -89,7 +89,7 @@ void AllReduceOpHandle::AllReduceImpl( platform::errors::InvalidArgument( "The number of local scopes should be equal " "to the number of places, but got the number of local scopes is " - "%d and the number of place is %d.", + "%d and the number of places is %d.", in_var_handles.size(), num_places)); std::vector lod_tensor_data; diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index 4c3b0a7c6a4..35b10660674 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/broadcast_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/profiler.h" @@ -31,10 +32,15 @@ void BroadcastOpHandle::RunImpl() { auto out_var_handles = DynamicCast(outputs_); PADDLE_ENFORCE_EQ(in_var_handles.size(), 1UL, - "The number of input should be one."); - PADDLE_ENFORCE_EQ( - out_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of inputs should be 1, but got %d.", + in_var_handles.size())); + PADDLE_ENFORCE_EQ(out_var_handles.size(), places_.size(), + platform::errors::PreconditionNotMet( + "The number of outputs and the number of places should " + "be equal, but got the number of outputs is %d and the " + "number of places is %d.", + out_var_handles.size(), places_.size())); VarHandle *in_var_handle = in_var_handles[0]; @@ -47,7 +53,9 @@ void BroadcastOpHandle::BroadcastOneVar( const std::vector &var_scopes) { auto *in_var = var_scopes.at(in_var_handle.scope_idx())->FindVar(in_var_handle.name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scopes.", + in_var_handle.name())); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); if (UNLIKELY(!in_tensor.IsInitialized())) { VLOG(3) << "in var " << in_var_handle.name() << "not inited, return!"; @@ -103,7 +111,7 @@ void BroadcastOpHandle::BroadcastOneVar( broadcast_calls.emplace_back( [send_recv_buffer, numel, type, root_id, &nccl_ctx] { - PADDLE_ENFORCE(platform::dynload::ncclBcast( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( send_recv_buffer, numel, static_cast(type), root_id, nccl_ctx.comm_, nccl_ctx.stream())); }); @@ -131,7 +139,8 @@ void BroadcastOpHandle::BroadcastOneVar( nccl_ctxs_->DevCtx(p)->Wait(); } #else - PADDLE_THROW("CUDA is not enabled."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } } @@ -154,10 +163,13 @@ void BroadcastOpHandle::InitOutputValue( auto t_out_p = out_var_handle->place(); auto *out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, platform::errors::NotFound( + "Variable %s is not found in scopes.", + out_var_handle->name())); if (is_gpu_place(in_tensor.place())) { - PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); } else { t_out_p = platform::CPUPlace(); } diff --git a/paddle/fluid/framework/details/broadcast_op_handle_test.h b/paddle/fluid/framework/details/broadcast_op_handle_test.h index e455879a68f..4fdc420e1e0 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle_test.h +++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h @@ -79,7 +79,8 @@ struct TestBroadcastOpHandle { } nccl_ctxs_.reset(new platform::NCCLContextMap(place_list_)); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { int count = 8; @@ -113,7 +114,8 @@ struct TestBroadcastOpHandle { op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { #if defined(PADDLE_WITH_NCCL) @@ -171,7 +173,9 @@ struct TestBroadcastOpHandle { float val_scalar = 0.0) { auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto lod_tensor = var->GetMutable(); std::vector send_vector(static_cast(f::product(kDims))); for (size_t k = 0; k < send_vector.size(); ++k) { @@ -194,7 +198,9 @@ struct TestBroadcastOpHandle { } auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto selected_rows = var->GetMutable(); auto value = selected_rows->mutable_value(); value->mutable_data(kDims, place_list_[input_scope_idx]); @@ -211,13 +217,24 @@ struct TestBroadcastOpHandle { const std::vector& send_vector, const std::vector& rows, int height) { auto var = param_scopes_[input_scope_idx]->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto& selected_rows = var->Get(); auto rt = selected_rows.value(); - PADDLE_ENFORCE_EQ(selected_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(selected_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %ld.", + height, selected_rows.height())); for (size_t k = 0; k < selected_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(selected_rows.rows()[k], rows[k]); + PADDLE_ENFORCE_EQ( + selected_rows.rows()[k], rows[k], + platform::errors::InvalidArgument( + "The item at position %zu of rows of SelectedRows " + "is not equal to the expected, expect %ld, but got %ld.", + k, rows[k], selected_rows.rows()[k])); } p::CPUPlace cpu_place; @@ -235,9 +252,15 @@ struct TestBroadcastOpHandle { framework::Scope* scope) { p::CPUPlace cpu_place; auto var = scope->FindVar(varname); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + varname)); auto tensor = var->Get(); - PADDLE_ENFORCE_EQ(tensor.lod(), lod, "lod is not equal."); + PADDLE_ENFORCE_EQ(tensor.lod(), lod, + platform::errors::InvalidArgument( + "The LoD of tensor is not equal to " + "the expected, expect %s, but got %s.", + lod, tensor.lod())); f::Tensor result_tensor; f::TensorCopySync(tensor, cpu_place, &result_tensor); float* ct = result_tensor.mutable_data(cpu_place); diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index ecdb8cc9b8c..962f968c84e 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -235,7 +235,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { AppendPass("reduce_mode_multi_devices_pass").get(); break; default: - PADDLE_THROW("Unknown reduce strategy."); + PADDLE_THROW( + platform::errors::Unimplemented("Unknown reduce strategy.")); } } multi_devices_pass->SetNotOwned("strategy", diff --git a/paddle/fluid/framework/details/eager_deletion_op_handle.cc b/paddle/fluid/framework/details/eager_deletion_op_handle.cc index 7735f9720c1..266557cb855 100644 --- a/paddle/fluid/framework/details/eager_deletion_op_handle.cc +++ b/paddle/fluid/framework/details/eager_deletion_op_handle.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/details/eager_deletion_op_handle.h" + #include #include #include -#include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/scope.h" @@ -47,15 +48,19 @@ EagerDeletionOpHandle::EagerDeletionOpHandle( if (dynamic_cast(gc_)) { platform::CUDADeviceGuard guard( BOOST_GET_CONST(platform::CUDAPlace, place).device); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); - PADDLE_ENFORCE_NOT_NULL(event_); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); + PADDLE_ENFORCE_NOT_NULL(event_, platform::errors::InvalidArgument( + "The cuda envet created is NULL.")); } } #endif - PADDLE_ENFORCE_NE(vars.empty(), true, platform::errors::InvalidArgument( - "Variable names are empty.")); + PADDLE_ENFORCE_NE(vars.empty(), true, + platform::errors::InvalidArgument( + "The variables to be deleted are empty.")); for (auto *var : var_infos_) { - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL(var, platform::errors::InvalidArgument( + "The memory optimization info is NULL.")); } } @@ -64,7 +69,7 @@ EagerDeletionOpHandle::~EagerDeletionOpHandle() { if (event_) { auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, dev_ctx_->GetPlace()); platform::CUDADeviceGuard guard(gpu_place.device); - PADDLE_ENFORCE(cudaEventDestroy(event_)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(event_)); } #endif } @@ -78,12 +83,17 @@ void EagerDeletionOpHandle::InitCUDA() { } void EagerDeletionOpHandle::CallOnce() { - PADDLE_ENFORCE(vars_.empty(), "vars_ must be initialized here"); + PADDLE_ENFORCE_EQ( + vars_.empty(), true, + platform::errors::InvalidArgument( + "The variables to be deleted should be initialized here.")); Scope *exec_scope = local_exec_scopes_[0]; for (auto *var_info : var_infos_) { auto *var = exec_scope->FindVar(var_info->Name()); - PADDLE_ENFORCE_NOT_NULL(var, "Variable %s should not be nullptr", - var_info->Name()); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable(%s) to be inplaced is not found in scope.", + var_info->Name())); vars_.emplace_back(var); } } @@ -119,8 +129,9 @@ void EagerDeletionOpHandle::RunImpl() { garbages.emplace_back(t.MoveMemoryHolder()); } } else { - PADDLE_THROW("Type %s of %s is not supported eager deletion", - framework::ToTypeName(var->Type()), var_info->Name()); + PADDLE_THROW(platform::errors::Unimplemented( + "The variable(%s) of type %s is not supported in eager deletion.", + framework::ToTypeName(var->Type()), var_info->Name())); } } @@ -137,8 +148,9 @@ void EagerDeletionOpHandle::ClearGarbages( auto callback_stream = reinterpret_cast(gc_)->stream(); auto callback_func = [=]() { - PADDLE_ENFORCE(cudaEventRecord(event_, compute_stream)); - PADDLE_ENFORCE(cudaStreamWaitEvent(callback_stream, event_, 0)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event_, compute_stream)); + PADDLE_ENFORCE_CUDA_SUCCESS( + cudaStreamWaitEvent(callback_stream, event_, 0)); }; gc_->Add(std::move(*garbages), callback_func); } else { diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index c67e21d5c47..c5388116699 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" + #include #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -56,10 +58,20 @@ void FusedAllReduceOpHandle::RunImpl() { size_t place_num = places_.size(); PADDLE_ENFORCE_EQ( in_var_handles.size(), place_num * num_of_all_reduce_, - "The NoDummyInputSize should be equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of input variable handles should be equal to the number " + "of places plus the number of all reduce handles, " + "but got the number of input variable handles is %d, the " + "number of places is %d, and the number of all reduce handles " + "is %d.", + in_var_handles.size(), place_num, num_of_all_reduce_)); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); + platform::errors::PreconditionNotMet( + "The number of input variable handles should be equal to the number " + "of output variable handles, but got the number of input variable " + "handles is %d, and the number of output variable handles is %d.", + in_var_handles.size(), out_var_handles.size())); // Note: some gradient op doesn't have CUDAKernel, so the gradients of // those op are in CPUPlace, in this case, the all reduce should not be fused. @@ -106,7 +118,13 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( dtype = ele_dtype; } - PADDLE_ENFORCE_EQ(ele_dtype, dtype); + PADDLE_ENFORCE_EQ( + ele_dtype, dtype, + platform::errors::InvalidArgument( + "The DataType of grad tensors of fused_all_reduce_op_handle " + "must be consistent. The current dtype is %s, but the " + "previous dtype is %s.", + DataTypeToString(ele_dtype), DataTypeToString(dtype))); // Check whether the address space is contiguous. std::sort( @@ -130,16 +148,29 @@ void FusedAllReduceOpHandle::FusedAllReduceFunc( "input[%d] address: 0X%02x. The offset: %d", k - 1, g_tensor.at(k - 1).first, cur_address, g_tensor.at(k).first, k, next_address, k, infer_next_address, offset); - PADDLE_ENFORCE_EQ(infer_next_address, next_address, - "The address is not consistent."); + PADDLE_ENFORCE_EQ( + infer_next_address, next_address, + platform::errors::InvalidArgument( + "The infered address of the next tensor should be equal to the " + "real address of the next tensor. But got infered address is %p " + "and real address is %p.", + infer_next_address, next_address)); } } if (!FLAGS_skip_fused_all_reduce_check) { for (size_t scope_idx = 0; scope_idx < place_num; ++scope_idx) { for (size_t j = 1; j < num_of_all_reduce_; ++j) { - PADDLE_ENFORCE_EQ(grads_tensor.at(0).at(j).first, - grads_tensor.at(scope_idx).at(j).first); + PADDLE_ENFORCE_EQ( + grads_tensor.at(0).at(j).first, + grads_tensor.at(scope_idx).at(j).first, + platform::errors::InvalidArgument( + "The variable name of grad tensors of " + "fused_all_reduce_op_handle " + "must be consistent. The current name is %s, but the " + "previous name is %s.", + grads_tensor.at(0).at(j).first, + grads_tensor.at(scope_idx).at(j).first)); } } } @@ -167,7 +198,9 @@ bool FusedAllReduceOpHandle::InputIsInDifferentPlace( for (size_t j = 0; j < in_var_handles.size(); j += place_num) { auto var_name = in_var_handles[j]->name(); auto var = local_scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable '%s' is not found in local scope.", var_name)); auto &lod_tensor = var->Get(); if (!is_same_place(lod_tensor.place(), places_.at(scope_idx))) { return true; @@ -185,14 +218,24 @@ void FusedAllReduceOpHandle::GetGradLoDTensor( size_t place_num = places_.size(); for (size_t j = 0; j < in_var_handles.size(); j += place_num) { auto var_name = in_var_handles[j]->name(); - PADDLE_ENFORCE_EQ(var_name, out_var_handles[j]->name()); + PADDLE_ENFORCE_EQ( + var_name, out_var_handles[j]->name(), + platform::errors::InvalidArgument( + "The name of input variable should be equal " + "to the name of output variable. But got the name of input " + "variable is %s and the name of output variable is %s.", + var_name, out_var_handles[j]->name())); auto var = local_scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var, "%s is not found in local scope.", var_name); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound( + "The variable '%s' is not found in local scope.", var_name)); auto &lod_tensor = var->Get(); PADDLE_ENFORCE_EQ( platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)), - true, "%s(%d) is not in the right place.", var_name, scope_idx); + true, platform::errors::InvalidArgument( + "The variable '%s' at scope %d is not in the right place.", + var_name, scope_idx)); grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor)); } } @@ -204,16 +247,26 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( size_t size_of_dtype = 0; for (size_t i = 0; i < grad_tensor.size(); ++i) { // Get dtype - auto ele_type = grad_tensor.at(i).second->type(); + auto ele_dtype = grad_tensor.at(i).second->type(); if (i == 0) { - *dtype = ele_type; - size_of_dtype = framework::SizeOfType(ele_type); + *dtype = ele_dtype; + size_of_dtype = framework::SizeOfType(ele_dtype); } - PADDLE_ENFORCE_EQ(ele_type, *dtype); + PADDLE_ENFORCE_EQ( + ele_dtype, *dtype, + platform::errors::InvalidArgument( + "The DataType of grad tensors of fused_all_reduce_op_handle " + "must be consistent. The current dtype is %s, but the " + "previous dtype is %s.", + DataTypeToString(ele_dtype), DataTypeToString(*dtype))); // Get element number int64_t len = grad_tensor.at(i).second->numel(); - PADDLE_ENFORCE_GT(len, 0); + PADDLE_ENFORCE_GT( + len, 0, platform::errors::InvalidArgument( + "The size of grad tensors of fused_all_reduce_op_handle " + "must be > 0, but got %d.", + len)); *numel += platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype; } diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc index 59c5da0de8c..1ae09dcde9f 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" #include "paddle/fluid/platform/profiler.h" @@ -32,7 +33,15 @@ void FusedBroadcastOpHandle::RunImpl() { WaitInputVarGenerated(); size_t place_num = places_.size(); - PADDLE_ENFORCE_EQ(in_var_handles.size() * place_num, out_var_handles.size()); + PADDLE_ENFORCE_EQ( + in_var_handles.size() * place_num, out_var_handles.size(), + platform::errors::PreconditionNotMet( + "The number of input variable handles plus the number " + "of places should be equal to the number of output variable handles, " + "but got the number of input variable handles is %d, the " + "number of places is %d, and the number of output variable handles " + "is %d.", + in_var_handles.size(), place_num, out_var_handles.size())); for (size_t i = 0; i < in_var_handles.size(); ++i) { BroadcastOneVar( diff --git a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc index 761a5b5a30a..ce7621d4e35 100644 --- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc +++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/fused_broadcast_op_handle.h" + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/details/broadcast_op_handle_test.h" #include "paddle/fluid/framework/details/op_handle_base.h" @@ -58,7 +60,8 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle { op_handle_ = new FusedBroadcastOpHandle( nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get()); #else - PADDLE_THROW("CUDA is not supported."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { #if defined(PADDLE_WITH_NCCL) diff --git a/paddle/fluid/framework/details/gather_op_handle.cc b/paddle/fluid/framework/details/gather_op_handle.cc index a039c6200e3..2d3b2fb39af 100644 --- a/paddle/fluid/framework/details/gather_op_handle.cc +++ b/paddle/fluid/framework/details/gather_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/gather_op_handle.h" + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -32,13 +33,20 @@ void GatherOpHandle::RunImpl() { PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::InvalidArgument( + "The number of input variables should be equal " + "to the number of places, but got the number of input variables is " + "%d and the number of places is %d.", + in_var_handles.size(), places_.size())); VarHandle *out_var_handle; { auto out_var_handles = DynamicCast(this->Outputs()); - PADDLE_ENFORCE_EQ(out_var_handles.size(), 1, - "The number of output should be one."); + PADDLE_ENFORCE_EQ( + out_var_handles.size(), 1, + platform::errors::InvalidArgument( + "The number of output variables should be 1, but got %d.", + out_var_handles.size())); out_var_handle = out_var_handles.front(); } @@ -47,10 +55,14 @@ void GatherOpHandle::RunImpl() { auto in_0_handle = in_var_handles[0]; auto pre_in_var = var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); - PADDLE_ENFORCE_NOT_NULL(pre_in_var); + PADDLE_ENFORCE_NOT_NULL( + pre_in_var, + platform::errors::NotFound("The variable '%s' is not found in the scope.", + in_0_handle->name())); - PADDLE_ENFORCE(pre_in_var->IsType(), - "Currently, gather_op only can gather SelectedRows."); + PADDLE_ENFORCE_EQ(pre_in_var->IsType(), true, + platform::errors::Unimplemented( + "Currently, gather_op only supports SelectedRows.")); // Wait input done, this Wait is asynchronous operation WaitInputVarGenerated(); @@ -63,7 +75,10 @@ void GatherOpHandle::RunImpl() { for (auto *in_handle : in_var_handles) { auto *in_var = var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, + platform::errors::NotFound( + "The variable '%s' is not found in the scope.", in_handle->name())); VariableVisitor::EnforceShapeAndDTypeEQ(*in_var, *pre_in_var); auto &in_sr_value = in_var->Get(); @@ -76,15 +91,19 @@ void GatherOpHandle::RunImpl() { // NOTE: The Places of all input tensor must be all on CPU or all on GPU. platform::Place t_out_p = out_var_handle->place(); if (platform::is_gpu_place(pre_in_value.place())) { - PADDLE_ENFORCE(platform::is_gpu_place(t_out_p), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(t_out_p), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); } else { t_out_p = platform::CPUPlace(); } auto out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL( + out_var, + platform::errors::NotFound("The variable '%s' is not found in the scope.", + out_var_handle->name())); auto out_value = out_var->GetMutable(); out_value->set_height(pre_in_value.height()); out_value->set_rows(out_rows); diff --git a/paddle/fluid/framework/details/gather_op_handle_test.cc b/paddle/fluid/framework/details/gather_op_handle_test.cc index f3fcc1a436d..60c1d0d39a5 100644 --- a/paddle/fluid/framework/details/gather_op_handle_test.cc +++ b/paddle/fluid/framework/details/gather_op_handle_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/details/gather_op_handle.h" + #include #include + #include "gtest/gtest.h" namespace paddle { @@ -60,7 +62,8 @@ struct TestGatherOpHandle { ctxs_.emplace_back(new p::CUDADeviceContext(p)); } #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { int count = 8; @@ -141,7 +144,9 @@ struct TestGatherOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_.at(input_scope_idx)->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "The variable '%s' is not found in the scope.", "input")); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); @@ -155,7 +160,9 @@ struct TestGatherOpHandle { } auto out_var = param_scopes_.at(output_scope_idx)->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound( + "The variable '%s' is not found in the scope.", "out")); auto out_selected_rows = out_var->GetMutable(); auto in_var = param_scopes_.at(output_scope_idx)->FindVar("input"); @@ -173,9 +180,19 @@ struct TestGatherOpHandle { auto& out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); - PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(out_select_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + height, out_select_rows.height())); + for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); + PADDLE_ENFORCE_EQ( + out_select_rows.rows()[k], rows[k % rows.size()], + platform::errors::InvalidArgument( + "The item at position %d of rows of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + k, rows[k % rows.size()], out_select_rows.rows()[k])); } f::Tensor result_tensor; @@ -207,6 +224,7 @@ TEST(GatherTester, TestGPUGatherTestSelectedRows) { test_op.TestGatherSelectedRows(input_scope_idx); } #endif + } // namespace details } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/nccl_op_handle.h b/paddle/fluid/framework/details/nccl_op_handle.h index 2d4d4122a3c..22a059773f5 100644 --- a/paddle/fluid/framework/details/nccl_op_handle.h +++ b/paddle/fluid/framework/details/nccl_op_handle.h @@ -46,14 +46,17 @@ class NCCLOpHandleBase : public OpHandleBase { } virtual ~NCCLOpHandleBase() { for (auto& ev : inter_events_) { - PADDLE_ENFORCE(cudaEventDestroy(ev.second)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second)); } for (auto& ev : exter_events_) { - PADDLE_ENFORCE(cudaEventDestroy(ev.second)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(ev.second)); } } void SetRunEnv(int run_order, bool use_hierarchical_allreduce) { - PADDLE_ENFORCE(run_order >= 0, "run_order must >= 0"); + PADDLE_ENFORCE_GE( + run_order, 0, + platform::errors::InvalidArgument( + "The argument run_order must be >= 0, but got %d.", run_order)); run_order_ = run_order; use_hierarchical_allreduce_ = use_hierarchical_allreduce; @@ -74,8 +77,11 @@ class NCCLOpHandleBase : public OpHandleBase { return; } - PADDLE_ENFORCE(places_.size() == 1, - "HierarchicalAllReduce run one proc with one card mode."); + PADDLE_ENFORCE_EQ(places_.size(), 1, + platform::errors::InvalidArgument( + "HierarchicalAllReduce can only run " + "one proccess with one card mode, but got %d cards.", + places_.size())); for (auto& p : places_) { auto ctxs = nccl_ctxs_->GetHierarchicalInterCtx(run_order); @@ -88,11 +94,11 @@ class NCCLOpHandleBase : public OpHandleBase { continue; } - PADDLE_ENFORCE(cudaSetDevice(dev_id)); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&inter_events_[dev_id], - cudaEventDisableTiming)); - PADDLE_ENFORCE(cudaEventCreateWithFlags(&exter_events_[dev_id], - cudaEventDisableTiming)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( + &inter_events_[dev_id], cudaEventDisableTiming)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( + &exter_events_[dev_id], cudaEventDisableTiming)); VLOG(10) << "Create events on dev_id:" << dev_id << ", inter_event:" << &inter_events_[dev_id] << ", exter_event:" << &exter_events_[dev_id]; @@ -102,7 +108,10 @@ class NCCLOpHandleBase : public OpHandleBase { void FlatNCCLAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); auto flat_nccl_ctxs = nccl_ctxs_->GetFlatCtx(run_order_); int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; auto& nccl_ctx = flat_nccl_ctxs->at(dev_id); @@ -113,14 +122,17 @@ class NCCLOpHandleBase : public OpHandleBase { << ", dev_id:" << dev_id << ", dtype:" << datatype << ", place:" << place; - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( sendbuff, recvbuff, count, datatype, op, comm, stream)); } void NCCLAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); if (!use_hierarchical_allreduce_) { FlatNCCLAllReduce(place, sendbuff, recvbuff, count, datatype, op); return; @@ -132,7 +144,10 @@ class NCCLOpHandleBase : public OpHandleBase { void HierarchicalAllReduce(platform::Place place, const void* sendbuff, void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { - PADDLE_ENFORCE(run_order_ >= 0, "run_order must > 0"); + PADDLE_ENFORCE_GE( + run_order_, 0, + platform::errors::InvalidArgument( + "The argument run_order_ must be >= 0, but got %d.", run_order_)); InterReduce(place, sendbuff, recvbuff, count, datatype, op); // When a trainer is not in exter allreduce ring // they need not to call this. @@ -157,14 +172,13 @@ class NCCLOpHandleBase : public OpHandleBase { << ", dtype:" << datatype << ", place:" << place << ", stream:" << stream; - PADDLE_ENFORCE(platform::dynload::ncclReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce( sendbuff, recvbuff, count, datatype, ncclSum, 0, comm, stream)); cudaEventRecord(inter_events_.at(dev_id), stream); if (FLAGS_sync_nccl_allreduce) { - PADDLE_ENFORCE(cudaStreamSynchronize(stream), - "sync HierarchicalAllReduce inter stream error"); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } } @@ -172,7 +186,9 @@ class NCCLOpHandleBase : public OpHandleBase { void* recvbuff, size_t count, ncclDataType_t datatype, ncclRedOp_t op) { auto nccl_ctxs = nccl_ctxs_->GetHierarchicalExterCtx(run_order_); - PADDLE_ENFORCE(nccl_ctxs_, "can't get exter %d nccl_ctxs", run_order_); + PADDLE_ENFORCE_NOT_NULL( + nccl_ctxs_, platform::errors::NotFound( + "Can't get exter %d nccl contexts.", run_order_)); int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; auto& nccl_ctx = nccl_ctxs->at(dev_id); auto stream = nccl_ctx.stream(); @@ -185,14 +201,13 @@ class NCCLOpHandleBase : public OpHandleBase { cudaStreamWaitEvent(stream, inter_events_.at(dev_id), 0); - PADDLE_ENFORCE(platform::dynload::ncclAllReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclAllReduce( sendbuff, recvbuff, count, datatype, op, comm, stream)); cudaEventRecord(exter_events_.at(dev_id), stream); if (FLAGS_sync_nccl_allreduce) { - PADDLE_ENFORCE(cudaStreamSynchronize(stream), - "sync HierarchicalAllReduce exter stream error"); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamSynchronize(stream)); } } @@ -210,8 +225,8 @@ class NCCLOpHandleBase : public OpHandleBase { << ", stream:" << stream; cudaStreamWaitEvent(stream, exter_events_.at(dev_id), 0); - PADDLE_ENFORCE(platform::dynload::ncclBcast(sendbuff, count, datatype, 0, - comm, stream)); + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBcast( + sendbuff, count, datatype, 0, comm, stream)); } protected: diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index 459bcff5c0b..105c37192f5 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -47,8 +47,8 @@ void OpHandleBase::InitCUDA() { #ifdef PADDLE_WITH_CUDA for (auto &p : dev_ctxes_) { int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device; - PADDLE_ENFORCE(cudaSetDevice(dev_id)); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming)); } if (IsMultiDeviceTransfer() && dev_ctxes_.size() > 0) { @@ -62,17 +62,22 @@ void OpHandleBase::InitCUDA() { } } } else { - PADDLE_ENFORCE_EQ(dev_ctxes_.size(), 1UL, - "%s should have only one dev_ctx.", Name()); + PADDLE_ENFORCE_EQ( + dev_ctxes_.size(), 1UL, + platform::errors::InvalidArgument( + "Operator %s should have only one dev_ctx, but got %d.", Name(), + dev_ctxes_.size())); auto &place = dev_ctxes_.begin()->first; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; for (auto &out_var : outputs_) { auto *out_var_handle = dynamic_cast(out_var); if (out_var_handle) { - PADDLE_ENFORCE(platform::is_same_place(place, out_var_handle->place()), - "The place of output(%s) is not consistent with the " - "place of current op(%s).", - out_var_handle->Name(), Name()); + PADDLE_ENFORCE_EQ( + platform::is_same_place(place, out_var_handle->place()), true, + platform::errors::InvalidArgument( + "The place of output(%s) is not consistent with the " + "place of current op(%s).", + out_var_handle->Name(), Name())); out_var_handle->SetGenerateEvent(events_.at(dev_id)); } } @@ -86,7 +91,10 @@ void OpHandleBase::Run(bool use_cuda) { InitCUDA(); } #else - PADDLE_ENFORCE(!use_cuda); + PADDLE_ENFORCE_EQ(use_cuda, false, + platform::errors::InvalidArgument( + "Argument use_cuda should be false when Paddle is not " + "compiled with CUDA.")); #endif // skip running current op, used with inplace_addto_op_pass @@ -100,17 +108,20 @@ void OpHandleBase::Run(bool use_cuda) { void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { #ifdef PADDLE_WITH_CUDA - PADDLE_ENFORCE_NOT_NULL(waited_ctx); + PADDLE_ENFORCE_NOT_NULL(waited_ctx, platform::errors::InvalidArgument( + "Argument waited_ctx is NULL.")); if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) { for (auto &dev_ctx : dev_ctxes_) { - PADDLE_ENFORCE_NOT_NULL(dev_ctx.second); + PADDLE_ENFORCE_NOT_NULL( + dev_ctx.second, + platform::errors::InvalidArgument("The device context is NULL.")); dev_ctx.second->Wait(); } } else { auto stream = static_cast(waited_ctx)->stream(); for (auto &ev : events_) { - PADDLE_ENFORCE(cudaStreamWaitEvent(stream, ev.second, 0)); + PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamWaitEvent(stream, ev.second, 0)); } } #else @@ -145,10 +156,11 @@ void OpHandleBase::WaitInputVarGenerated() { auto stream = static_cast(dev_ctxes_.at(place)) ->stream(); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); #else - PADDLE_THROW("Doesn't compile the GPU."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } // There are nothing to do when the place is CPUPlace. @@ -169,10 +181,11 @@ void OpHandleBase::WaitInputVarGenerated(const platform::Place &place) { auto stream = static_cast( dev_ctxes_.at(in_var_handle->place())) ->stream(); - PADDLE_ENFORCE( + PADDLE_ENFORCE_CUDA_SUCCESS( cudaStreamWaitEvent(stream, in_var_handle->GetEvent(), 0)); #else - PADDLE_THROW("Doesn't compile the GPU."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } // There are nothing to do when the place is CPUPlace. @@ -242,7 +255,9 @@ void OpHandleBase::SetLocalExecScopes( auto scopes = GetLocalScopes(); for (auto *scope : scopes) { auto iter = scope_map.find(scope); - PADDLE_ENFORCE(iter != scope_map.end(), "Local scope not found"); + PADDLE_ENFORCE_NE( + iter, scope_map.end(), + platform::errors::NotFound("Local scope not found in scope map.")); local_exec_scopes_.emplace_back(iter->second); } } diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 1e608000e0a..453a25166b5 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/inplace_op_inference.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" @@ -186,19 +187,20 @@ struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { PADDLE_ENFORCE_EQ(info->proto_, nullptr, platform::errors::AlreadyExists( - "OpProto of %s has been registered", op_type)); + "OpProto of %s has been registered.", op_type)); PADDLE_ENFORCE_EQ(info->checker_, nullptr, platform::errors::AlreadyExists( - "OpAttrChecker of %s has been registered", op_type)); + "OpAttrChecker of %s has been registered.", op_type)); info->proto_ = new proto::OpProto; info->checker_ = new OpAttrChecker(); T maker; maker(info->proto_, info->checker_); info->proto_->set_type(op_type); - PADDLE_ENFORCE( - info->proto_->IsInitialized(), - "Fail to initialize %s's OpProto, because %s is not initialized", - op_type, info->proto_->InitializationErrorString()); + PADDLE_ENFORCE_EQ( + info->proto_->IsInitialized(), true, + platform::errors::PreconditionNotMet( + "Fail to initialize %s's OpProto, because %s is not initialized.", + op_type, info->proto_->InitializationErrorString())); } }; diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index 11c4621fde3..9ecb2d8dbdd 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" @@ -32,9 +33,13 @@ struct ReduceLoDTensor { template void apply() const { - PADDLE_ENFORCE(!src_tensors_.empty()); + PADDLE_ENFORCE_NE(src_tensors_.empty(), true, + platform::errors::InvalidArgument( + "The number of tensors to be reduced is 0.")); auto &t0 = *src_tensors_[0]; - PADDLE_ENFORCE_NE(t0.numel(), 0); + PADDLE_ENFORCE_NE(t0.numel(), 0, + platform::errors::InvalidArgument( + "The size of first tensor to be reduced is 0.")); dst_tensor_.Resize(t0.dims()); T *dst = dst_tensor_.mutable_data(platform::CPUPlace()); @@ -45,8 +50,19 @@ struct ReduceLoDTensor { continue; } - PADDLE_ENFORCE_EQ(t.dims(), t0.dims()); - PADDLE_ENFORCE_EQ(t.type(), t0.type()); + PADDLE_ENFORCE_EQ(t.dims(), t0.dims(), + platform::errors::InvalidArgument( + "The shape of tensors to be reduced must be " + "consistent. The shape of current tensor is %s, " + "but the shape of the first tensor is %s.", + t.dims(), t0.dims())); + + PADDLE_ENFORCE_EQ(t.type(), t0.type(), + platform::errors::InvalidArgument( + "The type of tensors to be reduced must be " + "consistent. The type of current tensor is %s, " + "but the type of the first tensor is %s.", + t.type(), t0.type())); std::transform(t.data(), t.data() + t.numel(), dst, dst, [](T a, T b) -> T { return a + b; }); } @@ -88,7 +104,9 @@ struct GatherLocalSelectedRowsFunctor { in_places_(in_places), out_place_(out_place), dst_selected_rows_(dst_selected_rows) { - PADDLE_ENFORCE_EQ(src_selected_rows.empty(), false); + PADDLE_ENFORCE_NE(src_selected_rows.empty(), true, + platform::errors::InvalidArgument( + "The number of selected_rows to be gathered is 0.")); std::vector out_rows; diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index d8f8cc994c0..d7f13f79f68 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/details/reduce_op_handle.h" + #include + #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" #include "paddle/fluid/framework/details/variable_visitor.h" @@ -116,8 +118,15 @@ void ReduceOpHandle::GatherSelectedRows( merged_dev_ctx->Wait(); scope->EraseVars(std::vector{gathered_var_name}); - PADDLE_ENFORCE(client->Gather(vars, &remote, *merged_dev_ctx, scope)); - PADDLE_ENFORCE(remote.size() == vars.size()); + PADDLE_ENFORCE_EQ( + client->Gather(vars, &remote, *merged_dev_ctx, scope), true, + platform::errors::PreconditionNotMet("Gather SelectedRows failed.")); + PADDLE_ENFORCE_EQ(remote.size(), vars.size(), + platform::errors::PreconditionNotMet( + "The number of remotes should be equal to the number " + "of variables to be gathered, but got the number of " + "remotes is %d and the number of variables is %d.", + remote.size(), vars.size())); // 4. merged local selected rows. std::vector all; @@ -151,14 +160,19 @@ void ReduceOpHandle::RunImpl() { PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The number of output should equal to the number of places."); + platform::errors::InvalidArgument( + "The number of inputs should equal to the number of places, but got " + "the number of inputs is %d and the number of places is %d.", + in_var_handles.size(), places_.size())); VarHandle *out_var_handle; { auto out_var_handles = DynamicCast(outputs_); PADDLE_ENFORCE_EQ(out_var_handles.size(), 1UL, - "The number of output should be one."); + platform::errors::InvalidArgument( + "The number of output should be one, but got %d.", + out_var_handles.size())); out_var_handle = out_var_handles.front(); } @@ -168,7 +182,10 @@ void ReduceOpHandle::RunImpl() { auto pre_in_var = var_scopes.at(in_0_handle->scope_idx())->FindVar(in_0_handle->name()); - PADDLE_ENFORCE_NOT_NULL(pre_in_var); + + PADDLE_ENFORCE_NOT_NULL(pre_in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", + in_0_handle->name())); // NOTE: The Places of all input tensor must be all on CPU or all on GPU. std::vector in_places; // used to get dev_ctx @@ -176,21 +193,29 @@ void ReduceOpHandle::RunImpl() { in_places.emplace_back(in_handle->place()); auto in_var = var_scopes.at(in_handle->scope_idx())->FindVar(in_handle->name()); - PADDLE_ENFORCE_NOT_NULL(in_var); + + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scope.", + in_handle->name())); + VariableVisitor::EnforceShapeAndDTypeEQ(*pre_in_var, *in_var); } auto out_var = var_scopes.at(out_var_handle->scope_idx()) ->FindVar(out_var_handle->name()); - PADDLE_ENFORCE_NOT_NULL(out_var); + + PADDLE_ENFORCE_NOT_NULL( + out_var, platform::errors::NotFound("Variable %s is not found in scope.", + out_var_handle->name())); // NOTE: The tensors' Place of input and output must be all on GPU or all on // CPU. auto in_p = VariableVisitor::GetMutableTensor(pre_in_var).place(); platform::Place t_out_p; if (platform::is_gpu_place(in_p)) { - PADDLE_ENFORCE(platform::is_gpu_place(out_var_handle->place()), - "Places of input and output must be all on GPU."); + PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_var_handle->place()), true, + platform::errors::PreconditionNotMet( + "Places of input and output must be all on GPU.")); t_out_p = out_var_handle->place(); } else { t_out_p = platform::CPUPlace(); @@ -229,7 +254,10 @@ void ReduceOpHandle::RunImpl() { in_selected_rows, in_places, dev_ctxes_, out_var_handle, t_out_p, out_var->GetMutable()); } else { - PADDLE_THROW("only support double or float when gather SelectedRows"); + PADDLE_THROW(platform::errors::Unimplemented( + "Only support double or float when gather SelectedRows, but got " + "%s.", + framework::DataTypeToString(in_selected_rows[0]->value().type()))); } #endif }); @@ -292,7 +320,7 @@ void ReduceOpHandle::RunImpl() { size_t numel = static_cast(lod_tensor.numel()); all_reduce_calls.emplace_back( [buffer, recvbuffer, type, numel, root_id, &nccl_ctx] { - PADDLE_ENFORCE(platform::dynload::ncclReduce( + PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce( buffer, recvbuffer, numel, static_cast(type), ncclSum, root_id, nccl_ctx.comm_, nccl_ctx.stream())); }); @@ -306,10 +334,13 @@ void ReduceOpHandle::RunImpl() { } }); #else - PADDLE_THROW("CUDA is not enabled."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with CUDA.")); #endif } else { - PADDLE_THROW("Place should be CPUPlace or CUDAPlace."); + PADDLE_THROW(platform::errors::InvalidArgument( + "The place of tensor should be CPUPlace or CUDAPlace, but got %s.", + lod_tensors[0]->place())); } } } diff --git a/paddle/fluid/framework/details/reduce_op_handle_test.cc b/paddle/fluid/framework/details/reduce_op_handle_test.cc index d71251b76c7..ba03c3a267a 100644 --- a/paddle/fluid/framework/details/reduce_op_handle_test.cc +++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/details/reduce_op_handle.h" + #include + #include "gtest/gtest.h" #include "paddle/fluid/platform/device_context.h" @@ -69,7 +71,8 @@ struct TestReduceOpHandle { } nccl_ctxs_.reset(new platform::NCCLContextMap(gpu_list_)); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { int count = 8; @@ -103,7 +106,8 @@ struct TestReduceOpHandle { op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_, gpu_list_, nccl_ctxs_.get())); #else - PADDLE_THROW("CUDA is not support."); + PADDLE_THROW( + platform::errors::PreconditionNotMet("Not compiled with NCLL.")); #endif } else { #if defined(PADDLE_WITH_NCCL) @@ -164,7 +168,10 @@ struct TestReduceOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", "input")); auto in_selected_rows = in_var->GetMutable(); auto value = in_selected_rows->mutable_value(); value->mutable_data(kDims, gpu_list_[input_scope_idx]); @@ -178,7 +185,9 @@ struct TestReduceOpHandle { } auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, + platform::errors::NotFound( + "Variable %s is not found in scope.", "out")); auto out_selected_rows = out_var->GetMutable(); auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); @@ -196,9 +205,18 @@ struct TestReduceOpHandle { auto &out_select_rows = out_var->Get(); auto rt = out_select_rows.value(); - PADDLE_ENFORCE_EQ(out_select_rows.height(), height, "height is not equal."); + PADDLE_ENFORCE_EQ(out_select_rows.height(), height, + platform::errors::InvalidArgument( + "The height of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + height, out_select_rows.height())); for (size_t k = 0; k < out_select_rows.rows().size(); ++k) { - PADDLE_ENFORCE_EQ(out_select_rows.rows()[k], rows[k % rows.size()]); + PADDLE_ENFORCE_EQ( + out_select_rows.rows()[k], rows[k % rows.size()], + platform::errors::InvalidArgument( + "The item at position %d of rows of SelectedRows is not equal to " + "the expected, expect %d, but got %d.", + k, rows[k % rows.size()], out_select_rows.rows()[k])); } f::Tensor result_tensor; @@ -208,7 +226,7 @@ struct TestReduceOpHandle { for (int64_t j = 0; j < f::product(result_tensor.dims()); ++j) { ASSERT_NEAR(ct[j], send_vector[j % send_vector.size()], 1e-5); } - } + } // namespace details void TestReduceLodTensors(size_t output_scope_idx) { std::vector send_vector(static_cast(f::product(kDims))); @@ -220,7 +238,9 @@ struct TestReduceOpHandle { for (size_t input_scope_idx = 0; input_scope_idx < gpu_list_.size(); ++input_scope_idx) { auto in_var = param_scopes_[input_scope_idx]->FindVar("input"); - PADDLE_ENFORCE_NOT_NULL(in_var); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound( + "Variable %s is not found in scope.", "input")); auto in_lod_tensor = in_var->GetMutable(); in_lod_tensor->mutable_data(kDims, gpu_list_[input_scope_idx]); in_lod_tensor->set_lod(lod); @@ -230,7 +250,9 @@ struct TestReduceOpHandle { } auto out_var = param_scopes_[output_scope_idx]->FindVar("out"); - PADDLE_ENFORCE_NOT_NULL(out_var); + PADDLE_ENFORCE_NOT_NULL(out_var, + platform::errors::NotFound( + "Variable %s is not found in scope.", "out")); auto out_lodtensor = out_var->GetMutable(); auto in_var = param_scopes_[output_scope_idx]->FindVar("input"); @@ -254,7 +276,7 @@ struct TestReduceOpHandle { ASSERT_NEAR(ct[j], send_vector[j] * gpu_list_.size(), 1e-5); } } -}; +}; // namespace details TEST(ReduceTester, TestCPUReduceTestSelectedRows) { TestReduceOpHandle test_op; diff --git a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc index bf93d8f85b1..079e9abc895 100644 --- a/paddle/fluid/framework/details/share_tensor_buffer_functor.cc +++ b/paddle/fluid/framework/details/share_tensor_buffer_functor.cc @@ -111,13 +111,12 @@ void ShareTensorBufferFunctor::CallOnce() { auto *out_var = exec_scope_->FindVar(out_var_names_[i]); PADDLE_ENFORCE_NOT_NULL( in_var, platform::errors::NotFound( - "The input variable(%s)to be inplaced should not be NULL.", + "The variable(%s) to be inplaced is not found in scope.", in_var_infos_[i]->Name())); PADDLE_ENFORCE_NOT_NULL( - out_var, - platform::errors::NotFound( - "The output variable(%s) to be inplaced should not be NULL.", - out_var_names_[i])); + out_var, platform::errors::NotFound( + "The variable(%s) to be inplaced is not found in scope.", + out_var_names_[i])); PADDLE_ENFORCE_NE( in_var, out_var, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc index 3f9af1c3a12..37399e5ddc0 100644 --- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/sparse_all_reduce_op_handle.h" + #include #include + #include "dgc/dgc.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/reduce_and_gather.h" @@ -38,18 +40,23 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle( is_encoded_(is_encoded), nranks_(nranks) { // TODO(gongwb) :polish them! - PADDLE_ENFORCE_EQ(is_encoded, true); + PADDLE_ENFORCE_EQ(is_encoded, true, platform::errors::InvalidArgument( + "The argument is_encoded is false.")); VLOG(1) << "Use dgc allreduce mode" << ", nranks:" << nranks_; - PADDLE_ENFORCE_GT(local_scopes_.size(), 0); + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto nranks_name = g_dgc_nranks; for (size_t i = 0; i < local_scopes_.size(); ++i) { auto *local_scope = local_scopes_[i]; auto nranks_var = local_scope->FindVar(nranks_name); - if (nranks_var == nullptr) { - PADDLE_THROW("not find nranks_var:%s", nranks_name); - } + + PADDLE_ENFORCE_NOT_NULL( + nranks_var, platform::errors::NotFound( + "Variable %s is not found in scope.", nranks_name)); float *dgc_nranks = nranks_var->GetMutable()->data(); *dgc_nranks = nranks; @@ -64,10 +71,18 @@ void SparseAllReduceOpHandle::RunImplEncoded() { auto out_var_handles = DynamicCast(this->Outputs()); PADDLE_ENFORCE_EQ( in_var_handles.size(), places_.size(), - "The NoDummyInputSize should be equal to the number of places."); + platform::errors::PreconditionNotMet( + "The number of input variables should be equal to the number of " + "places, but got the number of input variables is %zu and the the " + "number of places is %zu.", + in_var_handles.size(), places_.size())); PADDLE_ENFORCE_EQ( in_var_handles.size(), out_var_handles.size(), - "The NoDummyInputSize and NoDummyOutputSize should be equal."); + platform::errors::PreconditionNotMet( + "The number of input variables should be equal to the number of " + "output variables, but got the number of input variables is %zu and " + "the the number of output variables is %zu.", + in_var_handles.size(), out_var_handles.size())); std::vector ins; std::vector gathers; @@ -80,14 +95,17 @@ void SparseAllReduceOpHandle::RunImplEncoded() { auto encode_var_name = original_name + g_dgc_encoded; auto *in_var = local_scope->FindVar(encode_var_name); - PADDLE_ENFORCE_NOT_NULL(in_var, "%s should not be null", encode_var_name); + PADDLE_ENFORCE_NOT_NULL( + in_var, platform::errors::NotFound("Variable %s is not found in scope.", + encode_var_name)); auto &in = in_var->Get(); ins.emplace_back(&in); auto gather_var_name = original_name + g_dgc_gather; auto *gather_var = local_scope->FindVar(gather_var_name); - PADDLE_ENFORCE_NOT_NULL(gather_var, "%s should not be null", - gather_var_name); + PADDLE_ENFORCE_NOT_NULL( + gather_var, platform::errors::NotFound( + "Variable %s is not found in scope.", gather_var)); auto *gather = gather_var->GetMutable(); gathers.emplace_back(gather); @@ -100,14 +118,26 @@ void SparseAllReduceOpHandle::RunImplEncoded() { } } - PADDLE_ENFORCE(platform::is_gpu_place(ins[0]->place())); - PADDLE_ENFORCE(platform::is_gpu_place(outs[0]->place())); - PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr."); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(ins[0]->place()), true, + platform::errors::InvalidArgument( + "The place of input variable should be CUDAPlace, but got %s.", + ins[0]->place())); + PADDLE_ENFORCE_EQ( + platform::is_gpu_place(outs[0]->place()), true, + platform::errors::InvalidArgument( + "The place of input variable should be CUDAPlace, but got %s.", + outs[0]->place())); + PADDLE_ENFORCE_NOT_NULL(nccl_ctxs_, platform::errors::PreconditionNotMet( + "The nccl contexts are NULL.")); int dtype = -1; size_t in_numel = 0; size_t out_numel = 0; - PADDLE_ENFORCE(nranks_ > 1); + PADDLE_ENFORCE_GT( + nranks_, 1, + platform::errors::PreconditionNotMet( + "The number of ranks should be > 1, but got %d.", nranks_)); std::vector> all_gather_calls; std::vector> sparse_reduce_calls; @@ -123,8 +153,16 @@ void SparseAllReduceOpHandle::RunImplEncoded() { dtype = (dtype == -1) ? platform::ToNCCLDataType(in.type()) : dtype; in_numel = (in_numel == 0) ? static_cast(in.numel()) : in_numel; - PADDLE_ENFORCE(in_numel % 2 == 0); - PADDLE_ENFORCE(in_numel / 2 == static_cast(k)); + PADDLE_ENFORCE_EQ(in_numel % 2, 0, + platform::errors::InvalidArgument( + "The number of elements of input variable should be " + "even, but got %zu.", + in_numel)); + PADDLE_ENFORCE_EQ(in_numel / 2, static_cast(k), + platform::errors::InvalidArgument( + "The number of elements of input variable should be " + "even, but got %zu.", + in_numel)); out_numel = (out_numel == 0) ? static_cast(out.numel()) : out_numel; int dev_id = BOOST_GET_CONST(platform::CUDAPlace, place).device; @@ -154,7 +192,8 @@ void SparseAllReduceOpHandle::RunImplEncoded() { PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce( gather_buff, k, out_tensor_buf, static_cast(out_numel), nranks_, stream), - true); + true, platform::errors::Unavailable( + "Calling sparseReduce() failed.")); }); } @@ -187,11 +226,16 @@ void SparseAllReduceOpHandle::SparseAllReduceFunc( int SparseAllReduceOpHandle::GetKValue(const std::string &grad_name) { auto original_name = paddle::framework::GradOriginalVarName(grad_name); auto var_name = original_name + g_dgc_k; - PADDLE_ENFORCE(local_scopes_.size() > 0); + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto *scope = local_exec_scopes_[0]; auto var = scope->FindVar(var_name); - PADDLE_ENFORCE_NOT_NULL(var); + PADDLE_ENFORCE_NOT_NULL( + var, platform::errors::NotFound("Variable %s is not found in scope.", + var_name)); auto tensor = var->Get().data(); return *tensor; } @@ -202,15 +246,22 @@ bool SparseAllReduceOpHandle::IsEncoded() { } auto counter_name = g_dgc_counter_name; auto step_name = g_dgc_rampup_begin_step; - PADDLE_ENFORCE(local_scopes_.size() > 0); + + PADDLE_ENFORCE_GT(local_scopes_.size(), 0, + platform::errors::PreconditionNotMet( + "The number of local scope should be > 0, but got %zu.", + local_scopes_.size())); auto *local_scope = local_exec_scopes_[0]; auto count_var = local_scope->FindVar(counter_name); auto step_var = local_scope->FindVar(step_name); - if (count_var == nullptr || step_var == nullptr) { - PADDLE_THROW("not find count_var:%s or step_var:%s", counter_name, - step_var); - } + + PADDLE_ENFORCE_NOT_NULL( + count_var, platform::errors::NotFound( + "Variable %s is not found in scope.", counter_name)); + PADDLE_ENFORCE_NOT_NULL( + step_var, platform::errors::NotFound("Variable %s is not found in scope.", + step_var)); float count = *count_var->Get().data(); float step = *step_var->Get().data(); -- GitLab From d014e29fc611392a015dc54b20a0d347e92e65f7 Mon Sep 17 00:00:00 2001 From: Chengmo Date: Sun, 27 Sep 2020 13:32:32 +0800 Subject: [PATCH 115/117] fix error message (#27318) * fix sgd/momentum/dpsgd/rmsprop error message --- paddle/fluid/operators/optimizers/dpsgd_op.cc | 35 +++++--- paddle/fluid/operators/optimizers/dpsgd_op.h | 18 ++-- .../fluid/operators/optimizers/momentum_op.h | 79 +++++++++++------ .../fluid/operators/optimizers/rmsprop_op.cc | 88 ++++++++++++------- .../fluid/operators/optimizers/rmsprop_op.h | 37 +++++--- paddle/fluid/operators/optimizers/sgd_op.cc | 34 ++++--- paddle/fluid/operators/optimizers/sgd_op.cu | 36 ++++++-- paddle/fluid/operators/optimizers/sgd_op.h | 79 +++++++++++++---- 8 files changed, 277 insertions(+), 129 deletions(-) diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.cc b/paddle/fluid/operators/optimizers/dpsgd_op.cc index 3bcf17fc7b3..bce00933420 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.cc +++ b/paddle/fluid/operators/optimizers/dpsgd_op.cc @@ -24,32 +24,45 @@ class DpsgdOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext *ctx) const override { PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true, - "Input(Param) of DpsgdOp should not be null."); + platform::errors::NotFound( + "Input(Param) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true, - "Input(Grad) of DpsgdOp should not be null."); - PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true, - "Input(LearningRate) of DpsgdOp should not be null."); + platform::errors::NotFound( + "Input(Grad) of DpsgdOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("LearningRate"), true, + platform::errors::NotFound( + "Input(LearningRate) of DpsgdOp should not be null.")); PADDLE_ENFORCE_EQ( ctx->GetInputsVarType("Param").front(), framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->GetInputsVarType("Param").front())); PADDLE_ENFORCE_EQ( ctx->GetInputsVarType("Grad").front(), framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->GetInputsVarType("Grad").front())); PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, - "Output(ParamOut) of DpsgdOp should not be null."); + platform::errors::NotFound( + "Output(ParamOut) of DpsgdOp should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "Learning rate should have 1 dimension"); + platform::errors::InvalidArgument( + "Learning rate should have 1 dimension. But Received " + "LearningRate's dims [%s].", + framework::product(lr_dims))); auto param_dims = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of DpsgdOp should have same dimension"); + platform::errors::InvalidArgument( + "Param and Grad input of DpsgdOp should have same dimension. But " + "received Para's dim [%s] and Grad's dim [%s].", + param_dims, ctx->GetInputDim("Grad"))); ctx->SetOutputDim("ParamOut", param_dims); } diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h index 4eb52feb851..e52a1dd9db1 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.h +++ b/paddle/fluid/operators/optimizers/dpsgd_op.h @@ -28,17 +28,19 @@ class DpsgdOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext &ctx) const override { const auto *param_var = ctx.InputVar("Param"); PADDLE_ENFORCE_EQ(param_var->IsType(), true, - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Param").front(), - framework::ToTypeName(param_var->Type())); + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + framework::ToTypeName(param_var->Type()))); const auto *grad_var = ctx.InputVar("Grad"); PADDLE_ENFORCE_EQ(grad_var->IsType(), true, - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Grad").front(), - framework::ToTypeName(grad_var->Type())); + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Grad").front(), + framework::ToTypeName(grad_var->Type()))); const auto *learning_rate = ctx.Input("LearningRate"); diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h index 10b72524efd..083bd91abfc 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.h +++ b/paddle/fluid/operators/optimizers/momentum_op.h @@ -40,43 +40,62 @@ class MomentumOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(param) of Momentum should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(grad) of Momentum should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Velocity"), - "Input(velocity) of Momentum should not be null."); - PADDLE_ENFORCE(ctx->HasInput("LearningRate"), - "Input(LearningRate) of Momentum should not be null."); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Param").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); - - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(ParamOut) of Momentum should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("VelocityOut"), - "Output(VelocityOut) of Momentum should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true, + platform::errors::NotFound( + "Input(param) of Momentum should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true, + platform::errors::NotFound( + "Input(grad) of Momentum should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Velocity"), true, + platform::errors::NotFound( + "Input(velocity) of Momentum should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("LearningRate"), true, + platform::errors::NotFound( + "Input(LearningRate) of Momentum should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->GetInputsVarType("Param").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type should be LoDTensor, but the received is %s", + ctx->GetInputsVarType("Param").front())); + + PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, + platform::errors::NotFound( + "Output(ParamOut) of Momentum should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("VelocityOut"), true, + platform::errors::NotFound( + "Output(VelocityOut) of Momentum should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function."); + platform::errors::InvalidArgument( + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "Learning_rate should be a scalar"); + platform::errors::InvalidArgument( + "Learning_rate should be a scalar. But Received " + "LearningRate's dim [%s]", + framework::product(lr_dims))); auto param_dim = ctx->GetInputDim("Param"); if (ctx->GetInputsVarType("Grad")[0] == framework::proto::VarType::LOD_TENSOR) { PADDLE_ENFORCE_EQ( param_dim, ctx->GetInputDim("Grad"), - "Param and Grad input of MomentumOp should have the same dimension."); + platform::errors::InvalidArgument( + "Param and Grad input of MomentumOp should have the same " + "dimension. But received Param's dim [%s] and Grad's dim [%s].", + param_dim, ctx->GetInputDim("Grad"))); PADDLE_ENFORCE_EQ( param_dim, ctx->GetInputDim("Velocity"), - "Param and Velocity of MomentumOp should have the same dimension."); + platform::errors::InvalidArgument( + "Param and Velocity of MomentumOp should have the same " + "dimension. But received Param's dim [%s] and Velocity [%s].", + param_dim, ctx->GetInputDim("Velocity"))); } ctx->SetOutputDim("ParamOut", param_dim); @@ -398,10 +417,12 @@ class MomentumOpKernel : public framework::OpKernel { for_range(functor); } } else { - PADDLE_THROW( - string::Sprintf("MomentumOp only supports LoDTensor or SelectedRows " - "gradient, but the received Variable Type is %s", - framework::ToTypeName(grad_var->Type()))); + PADDLE_ENFORCE_EQ(false, true, + platform::errors::PermissionDenied( + "Unsupported Variable Type of Grad " + "in MomentumOp. Excepted LodTensor " + "or SelectedRows, But received [%s]", + paddle::framework::ToTypeName(grad_var->Type()))); } } }; diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cc b/paddle/fluid/operators/optimizers/rmsprop_op.cc index eeee008cdc5..9e7960c237f 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cc @@ -22,47 +22,75 @@ class RmspropOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(Param) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("MeanSquare"), - "Input(MeanSquare) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("LearningRate"), - "Input(LearningRate) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(Grad) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Moment"), - "Input(Moment) of RmspropOp should not be null."); - PADDLE_ENFORCE( - ctx->GetInputsVarType("Param").front() == - framework::proto::VarType::LOD_TENSOR, - "The input var's type should be LoDTensor, but the received is %s", - ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); - - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(param_out) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), - "Output(MomentOut) of RmspropOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("MeanSquareOut"), - "Output(MeanSquareOut) of RmspropOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true, + platform::errors::NotFound( + "Input(Param) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("MeanSquare"), true, + platform::errors::NotFound( + "Input(MeanSquare) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("LearningRate"), true, + platform::errors::NotFound( + "Input(LearningRate) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true, + platform::errors::NotFound( + "Input(Grad) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("Moment"), true, + platform::errors::NotFound( + "Input(Moment) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->GetInputsVarType("Param").front(), + framework::proto::VarType::LOD_TENSOR, + platform::errors::InvalidArgument( + "The input var's type in RmspropOp should be " + "LoDTensor, but the received is %s", + ctx->GetInputsVarType("Param").front())); + + PADDLE_ENFORCE_EQ( + ctx->HasOutput("ParamOut"), true, + platform::errors::NotFound( + "Output(param_out) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("MomentOut"), true, + platform::errors::NotFound( + "Output(MomentOut) of RmspropOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("MeanSquareOut"), true, + platform::errors::NotFound( + "Output(MeanSquareOut) of RmspropOp should not be null.")); if (ctx->Attrs().Get("centered")) { - PADDLE_ENFORCE(ctx->HasOutput("MeanGradOut"), - "Output(MeanGradOut) of RmspropOp should not be null."); + PADDLE_ENFORCE_EQ( + ctx->HasOutput("MeanGradOut"), true, + platform::errors::NotFound( + "Output(MeanGradOut) of RmspropOp should not be null.")); } auto param_dim = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( param_dim, ctx->GetInputDim("Grad"), - "Param and grad input of RmspropOp should have the same dimension."); + platform::errors::InvalidArgument( + "Param and grad input of RmspropOp should have the same dimension. " + "But received Param's dim [%s] and Grad's dim [%s].", + param_dim, ctx->GetInputDim("Grad"))); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("Moment"), - "Param and Momentum input of RmspropOp " - "should have the same dimension."); + platform::errors::InvalidArgument( + "Param and Momentum input of RmspropOp " + "should have the same dimension. But received " + "Param's dim [%s] and Moment [%s]", + param_dim, ctx->GetInputDim("Moment"))); PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("MeanSquare"), - "Param and Momentum input of RmspropOp " - "should have the same dimension."); + platform::errors::InvalidArgument( + "Param and Momentum input of RmspropOp " + "should have the same dimension. But received " + "Param's dim [%s] and MeanSquare [%s]", + param_dim, ctx->GetInputDim("MeanSquare"))); auto lr_dim = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(framework::product(lr_dim), 1, - "Learning Rate should be a scalar."); + platform::errors::InvalidArgument( + "Learning Rate of RmspropOp should be a scalar. But " + "received LearningRate's dim [%s]", + framework::product(lr_dim))); ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("MomentOut", param_dim); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.h b/paddle/fluid/operators/optimizers/rmsprop_op.h index 4550052b2d6..1ec712a1431 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.h +++ b/paddle/fluid/operators/optimizers/rmsprop_op.h @@ -148,11 +148,15 @@ class RmspropOpKernel : public framework::OpKernel { auto &mom_tensor = *ctx.Input("Moment"); PADDLE_ENFORCE_EQ(&p_tensor, param_out, - "Param and ParamOut must be the same Tensor"); + platform::errors::InvalidArgument( + "Param and ParamOut must be the same Tensor")); PADDLE_ENFORCE_EQ(&mom_tensor, moment_out, - "Moment and MomentOut must be the same Tensor"); - PADDLE_ENFORCE_EQ(&ms_tensor, mean_square_out, - "MeanSquare and MeanSquareOut must be the same Tensor"); + platform::errors::InvalidArgument( + "Moment and MomentOut must be the same Tensor")); + PADDLE_ENFORCE_EQ( + &ms_tensor, mean_square_out, + platform::errors::InvalidArgument( + "MeanSquare and MeanSquareOut must be the same Tensor")); auto &dev_ctx = ctx.template device_context(); size_t limit = static_cast(ms_tensor.numel()); @@ -179,8 +183,10 @@ class RmspropOpKernel : public framework::OpKernel { auto &mg_tensor = *ctx.Input("MeanGrad"); auto mg = EigenVector::Flatten(mg_tensor); auto *mean_grad_out = ctx.Output("MeanGradOut"); - PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, - "MeanGrad and MeanGradOut must be the same Tensor"); + PADDLE_ENFORCE_EQ( + &mg_tensor, mean_grad_out, + platform::errors::InvalidArgument( + "MeanGrad and MeanGradOut must be the same Tensor")); auto mg_out = EigenVector::Flatten(*mean_grad_out); mg_out.device(place) = rho * mg + (1 - rho) * g; @@ -198,8 +204,10 @@ class RmspropOpKernel : public framework::OpKernel { if (centered) { auto &mg_tensor = *ctx.Input("MeanGrad"); auto *mean_grad_out = ctx.Output("MeanGradOut"); - PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, - "MeanGrad and MeanGradOut must be the same Tensor"); + PADDLE_ENFORCE_EQ( + &mg_tensor, mean_grad_out, + platform::errors::InvalidArgument( + "MeanGrad and MeanGradOut must be the same Tensor")); for_range(CenteredRmspropFunctor>( param_out->mutable_data(ctx.GetPlace()), mean_square_out->mutable_data(ctx.GetPlace()), @@ -233,8 +241,10 @@ class RmspropOpKernel : public framework::OpKernel { if (centered) { auto &mg_tensor = *ctx.Input("MeanGrad"); auto *mean_grad_out = ctx.Output("MeanGradOut"); - PADDLE_ENFORCE_EQ(&mg_tensor, mean_grad_out, - "MeanGrad and MeanGradOut must be the same Tensor"); + PADDLE_ENFORCE_EQ( + &mg_tensor, mean_grad_out, + platform::errors::InvalidArgument( + "MeanGrad and MeanGradOut must be the same Tensor")); for_range(CenteredRmspropFunctor>( param_out->mutable_data(ctx.GetPlace()), mean_square_out->mutable_data(ctx.GetPlace()), @@ -249,7 +259,12 @@ class RmspropOpKernel : public framework::OpKernel { rho, epsilon, momentum, grad_func)); } } else { - PADDLE_THROW("RMSProp only supports LoDTensor or SelectedRows gradient"); + PADDLE_ENFORCE_EQ(false, true, + platform::errors::PermissionDenied( + "Unsupported Variable Type of Grad " + "in RmspropOp. Excepted LodTensor " + "or SelectedRows, But received [%s]", + paddle::framework::ToTypeName(grad_var->Type()))); } } }; diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index aeff8da70b9..569dbcd6a3e 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -22,23 +22,31 @@ class SGDOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("Param"), - "Input(Param) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("Grad"), - "Input(Grad) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("LearningRate"), - "Input(LearningRate) of SGDOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), - "Output(ParamOut) of SGDOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true, + platform::errors::NotFound( + "Input(Param) of SGDOp should not be null.")); + PADDLE_ENFORCE_EQ( + ctx->HasInput("Grad"), true, + platform::errors::NotFound("Input(Grad) of SGDOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("LearningRate"), true, + platform::errors::NotFound( + "Input(LearningRate) of SGDOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, + platform::errors::NotFound( + "Output(ParamOut) of SGDOp should not be null.")); auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_NE(framework::product(lr_dims), 0, - "Maybe the Input variable LearningRate has not " - "been initialized. You may need to confirm " - "if you put exe.run(startup_program) " - "after optimizer.minimize function."); + platform::errors::NotFound( + "Maybe the Input variable LearningRate has not " + "been initialized. You may need to confirm " + "if you put exe.run(startup_program) " + "after optimizer.minimize function.")); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "Learning rate should have 1 element"); + platform::errors::InvalidArgument( + "Learning rate should have 1 element. But received " + "LearningRate dims [%s]", + framework::product(lr_dims))); auto param_dim = ctx->GetInputDim("Param"); if (ctx->GetInputsVarType("Grad")[0] == framework::proto::VarType::LOD_TENSOR) { diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index b70f24e0e5e..a5d9ad271f2 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -57,11 +57,12 @@ class SGDOpKernel public: void Compute(const framework::ExecutionContext& ctx) const override { const auto* param_var = ctx.InputVar("Param"); - PADDLE_ENFORCE(param_var->IsType(), - "The Var(%s)'s type should be LoDTensor, " - "but the received is %s", - ctx.InputNames("Param").front(), - framework::ToTypeName(param_var->Type())); + PADDLE_ENFORCE_EQ(param_var->IsType(), true, + platform::errors::InvalidArgument( + "The Var(%s)'s type should be LoDTensor, " + "but the received is %s", + ctx.InputNames("Param").front(), + paddle::framework::ToTypeName(param_var->Type()))); auto* param = ctx.Input("Param"); auto* param_out = ctx.Output("ParamOut"); @@ -91,18 +92,30 @@ class SGDOpKernel // TODO(qijun): In Sparse SGD operator, in-place update is enforced. // This manual optimization brings difficulty to track data dependency. // It's better to find a more elegant solution. - PADDLE_ENFORCE_EQ(param, param_out); + PADDLE_ENFORCE_EQ( + param, param_out, + platform::errors::InvalidArgument( + "The input tensor Param of SgdOp should be equal with ParamOut " + "if variable's type is SelectedRows.")); auto* grad = ctx.Input("Grad"); auto in_height = grad->height(); auto out_dims = param_out->dims(); - PADDLE_ENFORCE_EQ(in_height, out_dims[0]); + PADDLE_ENFORCE_EQ(in_height, out_dims[0], + platform::errors::InvalidArgument( + "The input tensor Grad's height of SgdOp should be " + "equal with ParamOut's dims. But received Grad's " + "height [%s] and ParamOut's dims [%s]", + in_height, out_dims[0])); auto& in_value = grad->value(); auto& in_rows = grad->rows(); int64_t in_row_numel = in_value.numel() / in_rows.size(); - PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height); + PADDLE_ENFORCE_EQ(in_row_numel, param_out->numel() / in_height, + platform::errors::InvalidArgument( + "The in_row_numel of SgdOp should be equal with " + "param_out's numel / in_height.")); auto* in_data = in_value.data(); auto* out_data = param_out->data(); @@ -118,7 +131,12 @@ class SGDOpKernel out_data, in_row_numel, in_rows.size()); } else { - PADDLE_THROW("Unsupported Variable Type of Grad"); + PADDLE_ENFORCE_EQ(false, true, + platform::errors::PermissionDenied( + "Unsupported Variable Type of Grad " + "in SgdOp. Excepted LodTensor or " + "SelectedRows, But received [%s]", + paddle::framework::ToTypeName(grad_var->Type()))); } } }; diff --git a/paddle/fluid/operators/optimizers/sgd_op.h b/paddle/fluid/operators/optimizers/sgd_op.h index 539d774a395..1aaf95efc32 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.h +++ b/paddle/fluid/operators/optimizers/sgd_op.h @@ -44,8 +44,20 @@ class SGDOpKernel if (grad_var->IsType()) { const auto *grad = ctx.Input("Grad"); auto sz = param_out->numel(); - PADDLE_ENFORCE_EQ(param->numel(), sz); - PADDLE_ENFORCE_EQ(grad->numel(), sz); + PADDLE_ENFORCE_EQ(param->numel(), sz, + platform::errors::InvalidArgument( + "The input tensor Param's numel of SgdOp " + "should be equal with ParamOut's numel. " + "But received Param's " + "numel = [%s], ParamOut's numel = [%s]", + param->numel(), sz)); + PADDLE_ENFORCE_EQ(grad->numel(), sz, + platform::errors::InvalidArgument( + "The input tensor Grad's numel of SgdOp " + "should be equal with ParamOut's numel. " + "But received Grad's " + "numel = [%s], ParamOut's numel = [%s]", + grad->numel(), sz)); jit::sgd_attr_t attr(1, sz, 1, sz, 1); const T *lr = learning_rate->data(); @@ -62,7 +74,11 @@ class SGDOpKernel // TODO(qijun): In Sparse SGD operator, in-place update is enforced. // This manual optimization brings difficulty to track data dependency. // It's better to find a more elegant solution. - PADDLE_ENFORCE_EQ(param, param_out); + PADDLE_ENFORCE_EQ(param, param_out, + platform::errors::InvalidArgument( + "The input tensor Param of SgdOp " + "should be equal with ParamOut if variable's " + "type is SelectedRows. ")); const auto *grad = ctx.Input("Grad"); auto &grad_rows = grad->rows(); @@ -73,7 +89,13 @@ class SGDOpKernel } auto out_dims = param_out->dims(); - PADDLE_ENFORCE_EQ(grad->height(), out_dims[0]); + PADDLE_ENFORCE_EQ( + grad->height(), out_dims[0], + platform::errors::InvalidArgument( + "The input tensor Grad's height of SgdOp " + "should be equal with ParamOut's dims. But received Grad's " + "height [%s] and ParamOut's dims [%s]", + grad->height(), out_dims[0])); auto &grad_value = grad->value(); const T *param_data = param->data(); const T *grad_data = grad_value.data(); @@ -87,19 +109,31 @@ class SGDOpKernel attr.grad_height = grad_rows.size(); // note: it is not grad->height() attr.grad_width = grad_value.numel() / attr.grad_height; attr.selected_rows_size = grad_rows.size(); - PADDLE_ENFORCE_EQ(attr.grad_width, attr.param_width); + PADDLE_ENFORCE_EQ( + attr.grad_width, attr.param_width, + platform::errors::InvalidArgument( + "The grad_value's numel of SgdOp " + "should be equal with param_out's numel. But received " + "grad_value's numel [%s] and param_out's numel [%s]", + attr.grad_width, attr.param_width)); auto sgd = jit::KernelFuncs, platform::CPUPlace>::Cache().At( attr); sgd(lr, param_data, grad_data, rows_data, out_data, &attr); } else { - PADDLE_THROW("Unsupported Variable Type of Grad"); + PADDLE_ENFORCE_EQ( + false, true, + platform::errors::PermissionDenied( + "Unsupported Variable Type of Grad in SgdOp. Excepted " + "LodTensor or SelectedRows, But received [%s]", + paddle::framework::ToTypeName(grad_var->Type()))); } } else if (param_var->IsType()) { - PADDLE_ENFORCE(grad_var->IsType(), - "when param " - "is SelectedRows, gradient should also be SelectedRows"); + PADDLE_ENFORCE_EQ(grad_var->IsType(), true, + platform::errors::InvalidArgument( + "when param is SelectedRows, " + "gradient should also be SelectedRows")); const auto ¶m = param_var->Get(); auto *param_out = ctx.Output("ParamOut"); const auto &grad = grad_var->Get(); @@ -112,27 +146,36 @@ class SGDOpKernel auto param_row_width = param.value().dims()[1]; auto grad_row_width = grad.value().dims()[1]; - VLOG(4) << " param rows: " << param.rows().size() - << " param memory rows: " << param.value().dims()[0] - << " grad rows: " << grad.rows().size() - << " grad memory rows: " << grad.value().dims()[0]; - PADDLE_ENFORCE_EQ(param_row_width, grad_row_width, - "param_row should have the same size with grad_row"); + PADDLE_ENFORCE_EQ( + param_row_width, grad_row_width, + platform::errors::InvalidArgument( + "The param_row in SgdOP should have the same size with grad_row. " + "But received param_row's width is [%s], and grad_row's width is " + "[%s]", + param_row_width, grad_row_width)); const auto *lr = learning_rate->data(); const auto *grad_data = grad.value().data(); auto *out_data = param_out->mutable_value()->data(); for (size_t i = 0; i < grad.rows().size(); i++) { int64_t id_index = param_out->AutoGrownIndex(grad.rows()[i], false); - PADDLE_ENFORCE_GE(id_index, static_cast(0), - "id should be in the table"); + PADDLE_ENFORCE_GE( + id_index, static_cast(0), + platform::errors::InvalidArgument( + "The id in SgdOp should be >= 0. But recevied id_index is [%s]", + id_index)); for (int64_t j = 0; j < grad_row_width; j++) { out_data[id_index * grad_row_width + j] -= lr[0] * grad_data[i * grad_row_width + j]; } } } else { - PADDLE_THROW("Unsupported Variable Type of Parameter"); + PADDLE_ENFORCE_EQ( + false, true, + platform::errors::PermissionDenied( + "Unsupported Variable Type of Parameter in SgdOp. Excepted " + "LodTensor or SelectedRows, But received [%s]", + paddle::framework::ToTypeName(param_var->Type()))); } } }; -- GitLab From d37b3774fd4a9b544422ba5e8e335f879744c440 Mon Sep 17 00:00:00 2001 From: Jack Zhou <136876878@qq.com> Date: Sun, 27 Sep 2020 13:54:48 +0800 Subject: [PATCH 116/117] register log double grad kernel for cpu and cuda register log double grad kernel for cpu and cuda --- paddle/fluid/operators/activation_op.cc | 51 +++++++++++++++++++ paddle/fluid/operators/activation_op.cu | 12 +++++ paddle/fluid/operators/activation_op.h | 36 ++++++++++++- .../unittests/test_activation_nn_grad.py | 24 +++++++++ 4 files changed, 122 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 95214484dca..a640a6c745c 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -891,6 +891,28 @@ class SquareDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { } }; +// log Grad: dx = dout / x +// log Grad Grad: ddout = ddx / x; dx = -(dout / x) * (ddx / x) +template +class LogDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker { + public: + using ::paddle::framework::SingleGradOpMaker::SingleGradOpMaker; + + protected: + void Apply(GradOpPtr op) const override { + op->SetType("log_grad_grad"); + op->SetInput("X", this->Input("X")); + // X@GRAD@GRAD: ddx + op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X"))); + op->SetInput("DOut", this->Input(framework::GradVarName("Out"))); + op->SetAttrMap(this->Attrs()); + // X@GRAD: dx + op->SetOutput("DX", this->InputGrad("X")); + // Out@GRAD@GRAD: ddy + op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out"))); + } +}; + DECLARE_INPLACE_OP_INFERER(ActivationGradOpInplaceInferer, {framework::GradVarName("Out"), framework::GradVarName("X")}); @@ -1272,6 +1294,35 @@ REGISTER_OP_CPU_KERNEL( ops::AbsGradGradFunctor>); /* ========================================================================== */ +/* ========================== Log register ==================================*/ +REGISTER_OPERATOR( + log, ops::ActivationOp, ops::LogOpMaker, ops::ActivationOpInferVarType, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::framework::OpDesc>, + ops::ActivationGradOpMaker::FwdDeps(), + paddle::imperative::OpBase>, + ops::ActFwdInplaceInferer); +REGISTER_OPERATOR(log_grad, ops::ActivationOpGrad, + ops::ActivationGradOpInplaceInferer, + ops::LogDoubleGradMaker, + ops::LogDoubleGradMaker); + +REGISTER_OPERATOR( + log_grad_grad, + ops::ActivationOpDoubleGrad::FwdDeps()>, + ops::ActivationDoubleGradOpInplaceInferer); + +REGISTER_ACTIVATION_CPU_KERNEL(log, Log, LogFunctor, LogGradFunctor); + +REGISTER_OP_CPU_KERNEL( + log_grad_grad, ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>); +/* ========================================================================== */ + /* ========================== register checkpoint ===========================*/ REGISTER_OP_VERSION(leaky_relu) .AddCheckpoint( diff --git a/paddle/fluid/operators/activation_op.cu b/paddle/fluid/operators/activation_op.cu index 072d952d261..839776ad58d 100644 --- a/paddle/fluid/operators/activation_op.cu +++ b/paddle/fluid/operators/activation_op.cu @@ -193,3 +193,15 @@ REGISTER_OP_CUDA_KERNEL( ops::ActivationDoubleGradKernel>); /* ========================================================================== */ + +/* ========================== Log register ==================================*/ +REGISTER_ACTIVATION_CUDA_KERNEL(log, Log, LogFunctor, LogGradFunctor); + +REGISTER_OP_CUDA_KERNEL( + log_grad_grad, ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>, + ops::LogDoubleGradKernel>); +/* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 646f546bffb..a5c613297a4 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -1663,6 +1663,10 @@ class SquareDoubleGradKernel } }; +template +class LogDoubleGradKernel + : public SquareDoubleGradKernel {}; + template class ELUDoubleGradKernel : public framework::OpKernel { @@ -1852,6 +1856,37 @@ class PowGradKernel functor(*place, x, out, dout, dx); } }; + +template +struct LogGradGradFunctor : public BaseActivationFunctor { + template + void operator()(const Device& dev, const framework::Tensor* X, + const framework::Tensor* ddX, framework::Tensor* ddOut, + const framework::Tensor* dOut, framework::Tensor* dX) const { + auto* d = dev.eigen_device(); + auto ddx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddX, "Input", "DDX", "LogGradGrad")); + auto x = framework::EigenVector::Flatten( + GET_DATA_SAFELY(X, "Input", "X", "LogGradGrad")); + // ddout = ddx / x; dx = -(dout / x) * (ddx / x) + // calculate dx first, so ddout can inplace ddx + if (dX) { + auto dout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dOut, "Output", "DOut", "LogGradGrad")); + auto dx = framework::EigenVector::Flatten( + GET_DATA_SAFELY(dX, "Output", "DX", "LogGradGrad")); + dx.device(*d) = dout * static_cast(-1) * ddx / (x * x); + } + if (ddOut) { + auto ddout = framework::EigenVector::Flatten( + GET_DATA_SAFELY(ddOut, "Output", "DDOut", "LogGradGrad")); + ddout.device(*d) = ddx * static_cast(1) / x; + } + } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepX; } +}; + } // namespace operators } // namespace paddle @@ -1872,7 +1907,6 @@ class PowGradKernel __macro(cosh, Cosh, CoshFunctor, CoshGradFunctor); \ __macro(round, Round, RoundFunctor, ZeroGradFunctor); \ __macro(reciprocal, Reciprocal, ReciprocalFunctor, ReciprocalGradFunctor); \ - __macro(log, Log, LogFunctor, LogGradFunctor); \ __macro(log1p, Log1p, Log1pFunctor, Log1pGradFunctor); \ __macro(brelu, BRelu, BReluFunctor, BReluGradFunctor); \ __macro(soft_relu, SoftRelu, SoftReluFunctor, SoftReluGradFunctor); \ diff --git a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py index c97cca654a7..6c4834b84f9 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py @@ -173,5 +173,29 @@ class TestAbsDoubleGradCheck(unittest.TestCase): self.func(p) +class TestLogDoubleGradCheck(unittest.TestCase): + @prog_scope() + def func(self, place): + shape = [2, 3, 7, 9] + eps = 1e-6 + dtype = np.float64 + + x = layers.data('x', shape, False, dtype) + x.persistable = True + y = layers.log(x) + + x_arr = np.random.uniform(0.1, 1, shape).astype(dtype) + + gradient_checker.double_grad_check( + [x], y, x_init=x_arr, place=place, eps=eps) + + def test_grad(self): + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": unittest.main() -- GitLab From 6b727e08b1f38b3f4acc1708c163ed6ae5df8d58 Mon Sep 17 00:00:00 2001 From: QingshuChen Date: Sun, 27 Sep 2020 13:56:14 +0800 Subject: [PATCH 117/117] support elementwise add, activation, matmul on Baidu Kunlun (#27143) * support elementwise add, activation, matmul on Baidu Kunlun * test=kunlun * minor * test=kunlun * reconstuct the xpu directory * test=kunlun * minor * test=kunlun * minor * test=kunlun * minor * test=kunlun * minor * test=kunlun * minor * test=kunlun --- cmake/external/xpu.cmake | 2 +- cmake/operators.cmake | 8 +- .../allocation/naive_best_fit_allocator.cc | 9 +- paddle/fluid/operators/activation_op_xpu.cc | 179 +++++++++ .../elementwise/elementwise_add_op_xpu.cc | 162 ++++++++ .../operators/elementwise/elementwise_xpu.h | 113 ++++++ paddle/fluid/operators/matmul_op_xpu.cc | 343 +++++++++++++++++ .../{xpu/mul_xpu_op.cc => mul_op_xpu.cc} | 2 +- paddle/fluid/platform/init_test.cc | 1 + paddle/fluid/platform/xpu_header.h | 27 ++ python/paddle/__init__.py | 2 + python/paddle/device.py | 35 +- .../paddle/fluid/tests/unittests/op_test.py | 16 + .../fluid/tests/unittests/test_matmul_op.py | 1 + .../fluid/tests/unittests/test_mul_op.py | 54 +-- .../tests/unittests/xpu/test_activation_op.py | 215 +++++++++++ .../unittests/xpu/test_elementwise_add_op.py | 346 +++++++++++++++++ .../tests/unittests/xpu/test_matmul_op.py | 355 ++++++++++++++++++ .../fluid/tests/unittests/xpu/test_mul_op.py | 161 ++++++++ tools/wlist.json | 4 +- 20 files changed, 1970 insertions(+), 65 deletions(-) create mode 100644 paddle/fluid/operators/activation_op_xpu.cc create mode 100644 paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc create mode 100644 paddle/fluid/operators/elementwise/elementwise_xpu.h create mode 100644 paddle/fluid/operators/matmul_op_xpu.cc rename paddle/fluid/operators/{xpu/mul_xpu_op.cc => mul_op_xpu.cc} (100%) create mode 100755 python/paddle/fluid/tests/unittests/xpu/test_activation_op.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_mul_op.py diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 8a927d8e282..07fe7d245ef 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -4,7 +4,7 @@ endif() INCLUDE(ExternalProject) SET(XPU_PROJECT "extern_xpu") -SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE) +SET(XPU_URL "https://baidu-kunlun-public.su.bcebos.com/paddle_depence/xpu.tar.gz" CACHE STRING "" FORCE) SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu") SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}") SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu") diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 21080fbe8fd..7aa2766763c 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -62,9 +62,9 @@ function(op_library TARGET) endif() endif() if(WITH_XPU) - string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}") - if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc) - list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc) + string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}") + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc) + list(APPEND xpu_cc_srcs ${XPU_FILE}.cc) endif() endif() else() @@ -83,7 +83,7 @@ function(op_library TARGET) list(APPEND mkldnn_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cu.cc$") list(APPEND cu_cc_srcs ${src}) - elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$") + elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$") list(APPEND xpu_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cc$") list(APPEND cc_srcs ${src}) diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index 92e3933a072..c661c9f9c37 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -127,11 +127,10 @@ void *Alloc(const platform::XPUPlace &place, size_t size) { "Baidu Kunlun Card is properly installed.", ret)); ret = xpu_malloc(reinterpret_cast(&p), size); - PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, - platform::errors::External( - "XPU API return wrong value[%d], please check whether " - "Baidu Kunlun Card is properly installed.", - ret)); + PADDLE_ENFORCE_EQ( + ret, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], no enough memory", ret)); if (FLAGS_init_allocated_mem) { PADDLE_THROW(platform::errors::Unimplemented( "xpu memory FLAGS_init_allocated_mem is not implemented.")); diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc new file mode 100644 index 00000000000..49b7a08a7b5 --- /dev/null +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -0,0 +1,179 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include "paddle/fluid/operators/activation_op.h" +#include +#include "paddle/fluid/platform/xpu_header.h" + +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; + +template +class XPUActivationKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = context.Attr(attr.first); + } + functor(context); + } +}; + +template +class XPUActivationGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = context.Attr(attr.first); + } + functor(context); + } +}; + +template +void xpu_activation_forward(const framework::ExecutionContext &ctx, + xpu::Activation_t type) { + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Out"); + const T *x_data = x->data(); + T *y_data = y->mutable_data(ctx.GetPlace()); + int r = 0; + if (xpu::Activation_t::ACT_POW == type.type) { + type.pow_factor = ctx.Attr("factor"); + } + auto xpu_context = ctx.device_context().x_context(); + r = xpu::activation_forward(xpu_context, type, x->numel(), + reinterpret_cast(x_data), + reinterpret_cast(y_data)); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); +} + +template +void xpu_activation_backward(const framework::ExecutionContext &ctx, + xpu::Activation_t type) { + /* TODO: relu tanh sigmoid are inplace */ + const auto *x = ctx.Input("X"); + auto *y = ctx.Input("Out"); + auto *dOut = ctx.Input(framework::GradVarName("Out")); + auto *dX = ctx.Output(framework::GradVarName("X")); + const T *x_data = nullptr; + const T *y_data = nullptr; + const T *y_grad = nullptr; + if (x != nullptr) x_data = x->data(); + if (y != nullptr) y_data = y->data(); + if (dOut != nullptr) y_grad = dOut->data(); + T *x_grad = dX->mutable_data(ctx.GetPlace()); + auto xpu_context = ctx.device_context().x_context(); + int r = xpu::activation_backward(xpu_context, type, dX->numel(), + reinterpret_cast(x_data), + reinterpret_cast(y_data), + reinterpret_cast(y_grad), + reinterpret_cast(x_grad)); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); +} + +template +struct XPUActivationFunc : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + xpu_activation_forward(ctx, + algorithm); + } +}; + +template +struct XPUActivationGradFunc : public BaseActivationFunctor { + void operator()(const framework::ExecutionContext &ctx) const { + xpu_activation_backward(ctx, + algorithm); + } +}; + +template +using XPUReluFunctor = XPUActivationFunc; +template +using XPUSigmoidFunctor = XPUActivationFunc; +template +using XPUTanhFunctor = XPUActivationFunc; +template +using XPUGeluFunctor = XPUActivationFunc; +template +using XPULogFunctor = XPUActivationFunc; +template +using XPUSquareFunctor = XPUActivationFunc; +template +using XPUSuareGradFunctor = XPUActivationGradFunc; +template +using XPUReluGradFunctor = XPUActivationGradFunc; +template +using XPUSigmoidGradFunctor = + XPUActivationGradFunc; +template +using XPUTanhGradFunctor = XPUActivationGradFunc; +template +using XPUGeluGradFunctor = XPUActivationGradFunc; +template +using XPUSqrtFunctor = XPUActivationFunc; +template +using XPUSqrtGradFunctor = XPUActivationGradFunc; +template +using XPUACTPowFunctor = XPUActivationFunc; +template +using XPUABSFunctor = XPUActivationFunc; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +#define REGISTER_ACTIVATION_XPU_KERNEL(act_type, functor, grad_functor) \ + REGISTER_OP_XPU_KERNEL(act_type, \ + ops::XPUActivationKernel>); \ + REGISTER_OP_XPU_KERNEL( \ + act_type##_grad, \ + ops::XPUActivationGradKernel>); + +REGISTER_ACTIVATION_XPU_KERNEL(relu, XPUReluFunctor, XPUReluGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(tanh, XPUTanhFunctor, XPUTanhGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(sigmoid, XPUSigmoidFunctor, + XPUSigmoidGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(gelu, XPUGeluFunctor, XPUGeluGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(sqrt, XPUSqrtFunctor, XPUSqrtGradFunctor) +REGISTER_ACTIVATION_XPU_KERNEL(square, XPUSquareFunctor, XPUSuareGradFunctor) +REGISTER_OP_XPU_KERNEL(log, + ops::XPUActivationKernel>); +REGISTER_OP_XPU_KERNEL(pow, + ops::XPUActivationKernel>); +REGISTER_OP_XPU_KERNEL(abs, + ops::XPUActivationKernel>); + +#endif // PADDLE_WITH_XPU diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc new file mode 100644 index 00000000000..9ff7a71d7f0 --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc @@ -0,0 +1,162 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/elementwise/elementwise_add_op.h" +#include +#include +#include "paddle/fluid/operators/elementwise/elementwise_op.h" + +#include "paddle/fluid/operators/elementwise/elementwise_xpu.h" + +namespace paddle { +namespace operators { + +template +class ElementwiseAddXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + XPUElementwise>(ctx); + } +}; + +template +class ElementwiseAddGradXPUKernel : public ElemwiseGradKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + ElemwiseGradKernel::Compute(ctx); + using Tensor = framework::Tensor; + + auto *dout = ctx.Input(framework::GradVarName("Out")); + auto *dx = ctx.Output(framework::GradVarName("X")); + auto *dy = ctx.Output(framework::GradVarName("Y")); + + auto dx_dims = dout->dims(); + auto dy_dims_untrimed = dout->dims(); + T *dx_data = NULL; + T *dy_data = NULL; + + int axis = ctx.Attr("axis"); + PADDLE_ENFORCE_GE(dx_dims.size(), dy_dims_untrimed.size(), + "Rank of first input must >= rank of second input."); + + if (dx != nullptr) { + dx->mutable_data(ctx.GetPlace()); + dx_dims = dx->dims(); + dx_data = dx->data(); + } + + if (dy != nullptr) { + dy->mutable_data(ctx.GetPlace()); + dy_dims_untrimed = dy->dims(); + dy_data = dy->data(); + } + + int pre, n, post, is_common_broadcast; + if (dx_dims == dy_dims_untrimed) { + pre = post = 1; + n = dout->numel(); + } else { + axis = (axis == -1 ? dx_dims.size() - dy_dims_untrimed.size() : axis); + PADDLE_ENFORCE(axis >= 0 && axis < dx_dims.size(), + "Axis should be in range [0, dx_dims)"); + auto dy_dims = trim_trailing_singular_dims(dy_dims_untrimed); + axis = (dy_dims.size() == 0) ? dx_dims.size() : axis; + get_mid_dims(dx_dims, dy_dims, axis, &pre, &n, &post, + &is_common_broadcast); + } + int len = pre * n * post; + + auto &dev_ctx = + ctx.template device_context(); + if (post == 1) { + int r = xpu::matrix_vector_add_grad( + dev_ctx.x_context(), dout->data(), dout->data(), + dout->data(), dout->data(), dx_data, dy_data, pre, n); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + return; + } + + if (dx == nullptr) { + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&dx_data), len * sizeof(float)), + XPU_SUCCESS, platform::errors::External("XPU has no enough memory")); + } + + if (dy == nullptr) { + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&dy_data), len * sizeof(float)), + XPU_SUCCESS, platform::errors::External("XPU has no enough memory")); + } else { + if (len != n) { + PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&dy_data), + len * sizeof(float)), + XPU_SUCCESS, platform::errors::External( + "XPU has no enough memory")); + } + } + + int r = xpu::elementwise_add_grad( + dev_ctx.x_context(), dout->data() /*x*/, dout->data() /*y*/, + dout->data() /*out*/, dout->data(), dx_data, dy_data, len); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + + if ((dy != nullptr) && (len != n)) { + r = xpu::reduce_ew(dev_ctx.x_context(), dy_data, dy->data(), pre, n, + post, xpu::ElementwiseOp::ASSIGN); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + dev_ctx.Wait(); + xpu_free(dy_data); + } + + if ((dx == nullptr || dy == nullptr) && !(dy != nullptr && len != n)) { + dev_ctx.Wait(); + } + + if (dx == nullptr) { + xpu_free(dx_data); + } + if (dy == nullptr) { + xpu_free(dy_data); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + elementwise_add, + ops::ElementwiseAddXPUKernel); +REGISTER_OP_XPU_KERNEL(elementwise_add_grad, + ops::ElementwiseAddGradXPUKernel< + paddle::platform::XPUDeviceContext, float>); +#endif diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h new file mode 100644 index 00000000000..53c4332e919 --- /dev/null +++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h @@ -0,0 +1,113 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace operators { + +template +struct XPUAddFunctor { + int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { + return xpu::elementwise_add(ctx, x, y, z, len); + } +}; + +template +struct XPUMulFunctor { + int operator()(xpu::Context* ctx, const T* x, const T* y, T* z, int len) { + return xpu::elementwise_mul(ctx, x, y, z, len); + } +}; + +template +void XPUElementwise(const framework::ExecutionContext& ctx) { + PADDLE_ENFORCE(platform::is_xpu_place(ctx.GetPlace()), + "This kernel only runs on XPU device."); + auto x_var = ctx.InputVar("X"); + PADDLE_ENFORCE_NE(x_var, nullptr, + platform::errors::Fatal("Cannot get input Variable X")); + PADDLE_ENFORCE(x_var->IsType(), + "XPU only support LoDTensor"); + + auto x = x_var->Get(); + auto* y = ctx.Input("Y"); + auto* z = ctx.Output("Out"); + z->mutable_data(ctx.GetPlace()); + + int axis = ctx.Attr("axis"); + auto x_dims = x.dims(); + auto y_dims_untrimed = y->dims(); + PADDLE_ENFORCE_GE(x_dims.size(), y_dims_untrimed.size(), + "Rank of first input must >= rank of second input."); + axis = (axis == -1 ? x_dims.size() - y_dims_untrimed.size() : axis); + PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(), + "Axis should be in range [0, x_dims)"); + auto y_dims = trim_trailing_singular_dims(y_dims_untrimed); + axis = (y_dims.size() == 0) ? x_dims.size() : axis; + int pre, n, post, is_common_broadcast; + get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post, &is_common_broadcast); + int len = pre * n * post; + + const T* x_data = x.data(); + const T* y_data = y->data(); + T* z_data = z->data(); + T* y_broadcast = nullptr; + + auto& dev_ctx = + ctx.template device_context(); + + if (post == 1) { + if (std::is_same>::value) { + int res = xpu::matrix_vector_add(dev_ctx.x_context(), x_data, y_data, + z_data, pre, n); + PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d", + res); + return; + } + if (std::is_same>::value) { + int res = xpu::matrix_vector_mul(dev_ctx.x_context(), x_data, y_data, + z_data, pre, n); + PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d", + res); + return; + } + } + + if (pre != 1 || post != 1) { + PADDLE_ENFORCE(xpu_malloc(reinterpret_cast(&y_broadcast), + len * sizeof(T)) == XPU_SUCCESS); + int res = xpu::broadcast_ew(dev_ctx.x_context(), y_data, y_broadcast, pre, + n, post, xpu::ElementwiseOp::ASSIGN); + PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d", + res); + y_data = y_broadcast; + } + + Functor functor; + int res = functor(dev_ctx.x_context(), x_data, y_data, z_data, len); + PADDLE_ENFORCE(res == xpu::Error_t::SUCCESS, "XPU kernel error! res = %d", + res); + + if (pre != 1 || post != 1) { + dev_ctx.Wait(); + xpu_free(y_broadcast); + } +} + +} // namespace operators +} // namespace paddle +#endif diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc new file mode 100644 index 00000000000..ff038d7ef12 --- /dev/null +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -0,0 +1,343 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_XPU + +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/blas.h" + +namespace paddle { +namespace operators { + +static framework::DDim RowMatrixFromVector(const framework::DDim &x_dim) { + if (x_dim.size() > 1) { + return x_dim; + } + return framework::make_ddim({1, x_dim[0]}); +} + +static framework::Tensor FoldInitDims(const framework::Tensor &input) { + auto output = input; + auto in_dims = input.dims(); + if (in_dims.size() == 3) { + output.Resize({in_dims[0] * in_dims[1], in_dims[2]}); + } + return output; +} +/** + * Get column matrix shape from a vector shape. If the ran of y_dim > 1, the + * original y_dim is returned. + */ +static framework::DDim ColumnMatrixFromVector(const framework::DDim &y_dim) { + if (y_dim.size() > 1) { + return y_dim; + } + return framework::make_ddim({y_dim[0], 1}); +} + +static void ReshapeTensorIntoMatrixSequence( + framework::Tensor *x, const math::MatDescriptor &descriptor) { + int64_t h, w; + h = descriptor.height_; + w = descriptor.width_; + if (descriptor.trans_) { + std::swap(w, h); + } + if (descriptor.batch_size_) { + x->Resize({descriptor.batch_size_, h, w}); + } else { + x->Resize({h, w}); + } +} +/** + * Reshape the x,y,out tensor to 3-D or 2-D tensor by matrix descriptor + * Out = matmul(x, y) + * + * This method will first calculate X,Y matrix sequence, and then calculate + * the out shape. + * + * Assume X = [BatchSize, H1, W1], Y = [BatchSize, H2, W2] + * The out = [BatchSize, H1, W2] + * + * If there is no batch size in `X` and `Y`, the out will be [H1, W2] + * If any of `X` and `Y` has batch size BatchSize, the out will have the + * BatchSize. + */ +static void ReshapeXYOutIntoMatrixSequence(framework::Tensor *x, + framework::Tensor *y, + framework::Tensor *out, bool trans_x, + bool trans_y) { + auto x_dim = RowMatrixFromVector(x->dims()); + auto y_dim = ColumnMatrixFromVector(y->dims()); + auto mat_dim_x = math::CreateMatrixDescriptor(x_dim, 0, trans_x); + auto mat_dim_y = math::CreateMatrixDescriptor(y_dim, 0, trans_y); + if (mat_dim_x.batch_size_ == 0 && mat_dim_y.batch_size_ == 0) { + out->Resize({mat_dim_x.height_, mat_dim_y.width_}); + } else { + out->Resize({std::max(mat_dim_x.batch_size_, mat_dim_y.batch_size_), + mat_dim_x.height_, mat_dim_y.width_}); + } + + ReshapeTensorIntoMatrixSequence(x, mat_dim_x); + ReshapeTensorIntoMatrixSequence(y, mat_dim_y); +} + +template +class MatMulXPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &context) const override { + auto *x = context.Input("X"); + auto *y = context.Input("Y"); + auto *out = context.Output("Out"); + out->mutable_data(context.GetPlace()); + + auto mat_dim_a = math::CreateMatrixDescriptor( + RowMatrixFromVector(x->dims()), 0, context.Attr("transpose_X")); + auto mat_dim_b = + math::CreateMatrixDescriptor(ColumnMatrixFromVector(y->dims()), 0, + context.Attr("transpose_Y")); + PADDLE_ENFORCE_EQ( + mat_dim_a.width_, mat_dim_b.height_, + platform::errors::InvalidArgument("Shape mistake in matmul_op")); + PADDLE_ENFORCE_EQ( + mat_dim_a.batch_size_, mat_dim_b.batch_size_, + platform::errors::InvalidArgument("Shape mistake in matmul_op")); + T alpha = static_cast(context.Attr("alpha")); + + auto &dev_ctx = context.template device_context(); + float *data_c = out->data(); + if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) { + int r = + xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_, + mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_, + alpha, x->data(), y->data(), 0.0f, data_c); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } else { + // batch matmul + int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_, + mat_dim_b.trans_, mat_dim_a.batch_size_, + mat_dim_a.height_, mat_dim_b.width_, + mat_dim_a.width_, alpha, x->data(), + y->data(), data_c, nullptr, nullptr); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } + } +}; + +// Reshape a rank-3 tensor from P x M x N to M x (P * N). +// (Warning: This requires transposing data and writes into new memory.) +// Identity op if the tensor is not of rank 3. +template +static framework::Tensor XPUFoldHeadAndLastDims( + const DeviceContext &context, const framework::Tensor &input) { + auto in_dims = input.dims(); + if (in_dims.size() != 3) { + return input; + } + + framework::Tensor output; + output.Resize({in_dims[1], in_dims[0], in_dims[2]}); + output.mutable_data(context.GetPlace()); + std::vector in_shape_host = {static_cast(in_dims[0]), + static_cast(in_dims[1]), + static_cast(in_dims[2])}; + std::vector axis_host = {1, 0, 2}; + + int r = xpu::transpose(context.x_context(), input.data(), output.data(), + in_shape_host.data(), axis_host.data(), /*ndims=*/3); + PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + output.Resize({in_dims[1], in_dims[0] * in_dims[2]}); + + return output; +} + +// Using dimensional constraints on matrix multiplication, it is +// straight-forward to check the following table for when X and Y +// are both matrices. +// +// transpose_X | False | True | False | True +// transpose_Y | False | False | True | True +// -----------+----------+----------+----------+----------- +// dX = | dOut Y^T | Y dOut^T | dOut Y | Y^T dOut^T +// dY = | X^T dOut | X dOut | dOut^T X | dOut^T X^T +// +// When X is a vector of size K, we treat it instead as a matrix of shape +// (1, K). Similarly, when Y is a vector of size K, we treat it instead as +// a matrix of shape (K, 1). +// +// When X and Y are both 3-dimensional tensors, then the first dimension +// the batch dimension can be ignored and the exact same formulas apply +// as for two matrices. +// +// Finally, when, e.g., X is a 3-dimensional tensor but Y is a matrix, we end +// up with formulas like +// +// dY_{ij} = \sum_{p, m} X_{pmi} dOut_{pmj} +// +// To handle this sort of scenario, we reshape X : P x M x K, dOut: P x M x N +// to X: (P * M) x K, dOut: (P * M) x N. +template +class MatMulGradXPUKernel : public framework::OpKernel { + public: + void MatMul(const framework::ExecutionContext &context, + const framework::Tensor &a, bool trans_a, + const framework::Tensor &b, bool trans_b, + framework::Tensor *out) const { + out->mutable_data(context.GetPlace()); + auto mat_dim_a = math::CreateMatrixDescriptor(a.dims(), 0, trans_a); + auto mat_dim_b = math::CreateMatrixDescriptor(b.dims(), 0, trans_b); + PADDLE_ENFORCE_EQ( + mat_dim_a.width_, mat_dim_b.height_, + platform::errors::InvalidArgument("Shape mistake in matmul_grad_op")); + PADDLE_ENFORCE_EQ( + mat_dim_a.batch_size_, mat_dim_b.batch_size_, + platform::errors::InvalidArgument("Shape mistake in matmul_grad_op")); + T alpha = static_cast(context.Attr("alpha")); + + auto &dev_ctx = context.template device_context(); + float *data_c = out->data(); + if (mat_dim_a.batch_size_ == 0 || mat_dim_a.batch_size_ == 1) { + int r = + xpu::fc_int16(dev_ctx.x_context(), mat_dim_a.trans_, mat_dim_b.trans_, + mat_dim_a.height_, mat_dim_b.width_, mat_dim_a.width_, + alpha, a.data(), b.data(), 0.0f, data_c); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } else { + // batch matmul + int r = xpu::batched_gemm_int16(dev_ctx.x_context(), mat_dim_a.trans_, + mat_dim_b.trans_, mat_dim_a.batch_size_, + mat_dim_a.height_, mat_dim_b.width_, + mat_dim_a.width_, alpha, a.data(), + b.data(), data_c, nullptr, nullptr); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External( + "XPU API return wrong value[%d], please check whether " + "Baidu Kunlun Card is properly installed.", + r)); + } + } + + void CalcInputGrad(const framework::ExecutionContext &context, + const framework::Tensor &a, bool trans_a, + bool is_fold_init_dims_a, const framework::Tensor &b, + bool trans_b, bool is_fold_init_dims_b, + framework::Tensor *out) const { + if (out == nullptr) return; + bool need_combine = (a.dims().size() == 3 || b.dims().size() == 3) && + out->dims().size() == 2; + if (!need_combine) { + MatMul(context, a, trans_a, b, trans_b, out); + } else { + auto &dev_ctx = context.template device_context(); + MatMul( + context, is_fold_init_dims_a + ? FoldInitDims(a) + : XPUFoldHeadAndLastDims(dev_ctx, a), + trans_a, is_fold_init_dims_b + ? FoldInitDims(b) + : XPUFoldHeadAndLastDims(dev_ctx, b), + trans_b, out); + } + } + + void Compute(const framework::ExecutionContext &context) const override { + auto x = *context.Input("X"); + auto y = *context.Input("Y"); + auto dout = + *context.Input(framework::GradVarName("Out")); + auto *dx = context.Output(framework::GradVarName("X")); + auto *dy = context.Output(framework::GradVarName("Y")); + bool transpose_x = context.Attr("transpose_X"); + bool transpose_y = context.Attr("transpose_Y"); + + ReshapeXYOutIntoMatrixSequence(&x, &y, &dout, transpose_x, transpose_y); + + framework::DDim dx_dims; + if (dx) { + dx_dims = dx->dims(); + if (dx_dims != x.dims()) { + dx->Resize(x.dims()); + } + } + + framework::DDim dy_dims; + if (dy) { + dy_dims = dy->dims(); + if (dy_dims != y.dims()) { + dy->Resize(y.dims()); + } + } + + if (transpose_x && transpose_y) { + CalcInputGrad(context, y, true, true, dout, true, false, dx); + CalcInputGrad(context, dout, true, true, x, true, false, dy); + } else if (transpose_x) { + CalcInputGrad(context, y, false, false, dout, true, false, dx); + CalcInputGrad(context, x, false, false, dout, false, true, dy); + } else if (transpose_y) { + CalcInputGrad(context, dout, false, false, y, false, true, dx); + CalcInputGrad(context, dout, true, true, x, false, true, dy); + } else { + CalcInputGrad(context, dout, false, false, y, true, false, dx); + CalcInputGrad(context, x, true, true, dout, false, true, dy); + } + + if (dx) { + if (dx_dims != x.dims()) { + dx->Resize(dx_dims); + } + } + + if (dy) { + if (dy_dims != y.dims()) { + dy->Resize(dy_dims); + } + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_XPU_KERNEL( + matmul, ops::MatMulXPUKernel); +REGISTER_OP_XPU_KERNEL( + matmul_grad, + ops::MatMulGradXPUKernel); +#endif diff --git a/paddle/fluid/operators/xpu/mul_xpu_op.cc b/paddle/fluid/operators/mul_op_xpu.cc similarity index 100% rename from paddle/fluid/operators/xpu/mul_xpu_op.cc rename to paddle/fluid/operators/mul_op_xpu.cc index 79aae71c304..0c8469101ab 100644 --- a/paddle/fluid/operators/xpu/mul_xpu_op.cc +++ b/paddle/fluid/operators/mul_op_xpu.cc @@ -14,11 +14,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/mul_op.h" #include #include #include #include -#include "paddle/fluid/operators/mul_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/platform/init_test.cc b/paddle/fluid/platform/init_test.cc index f14fbdd74f9..f1832206a1a 100644 --- a/paddle/fluid/platform/init_test.cc +++ b/paddle/fluid/platform/init_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/fluid/platform/xpu_info.h" TEST(InitDevices, CPU) { using paddle::framework::InitDevices; diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu_header.h index d8c5f85f9cf..95e4979951d 100644 --- a/paddle/fluid/platform/xpu_header.h +++ b/paddle/fluid/platform/xpu_header.h @@ -15,9 +15,36 @@ #pragma once #ifdef PADDLE_WITH_XPU +#include +#include + +#include "paddle/fluid/platform/errors.h" #include "xpu/api.h" #include "xpu/runtime.h" #include "xpu/runtime_ex.h" namespace xpu = baidu::xpu::api; + +class XPUActHelper { + public: + // Convert string to activation type in xpu + static xpu::Activation_t ConvertToXpuActType( + const std::string& act_type_str) { + static std::unordered_map str2act = { + {"linear", xpu::Activation_t::LINEAR}, + {"relu", xpu::Activation_t::RELU}, + {"sigmoid", xpu::Activation_t::SIGMOID}, + {"tanh", xpu::Activation_t::TANH}, + {"gelu", xpu::Activation_t::GELU}, + {"leaky_relu", xpu::Activation_t::LEAKY_RELU}, + {"sqrt", xpu::Activation_t::SQRT}, + {"square", xpu::Activation_t::SQUARE}}; + + auto res = str2act.find(act_type_str); + PADDLE_ENFORCE_NE(res, str2act.end(), + paddle::platform::errors::InvalidArgument( + "Invalid activation type(%s) in XPU", act_type_str)); + return res->second; + } +}; #endif diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 40275a2ce71..e707de8e068 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -257,6 +257,8 @@ from .tensor.stat import numel #DEFINE_ALIAS from .device import get_cudnn_version from .device import set_device from .device import get_device +from .device import is_compiled_with_xpu +from .device import XPUPlace # from .tensor.tensor import Tensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensor #DEFINE_ALIAS # from .tensor.tensor import LoDTensorArray #DEFINE_ALIAS diff --git a/python/paddle/device.py b/python/paddle/device.py index de24fd87513..46d0ff7bedc 100644 --- a/python/paddle/device.py +++ b/python/paddle/device.py @@ -22,7 +22,9 @@ from paddle.fluid.dygraph.parallel import ParallelEnv __all__ = [ 'get_cudnn_version', 'set_device', - 'get_device' + 'get_device', + 'XPUPlace', + 'is_compiled_with_xpu' # 'cpu_places', # 'CPUPlace', # 'cuda_pinned_places', @@ -35,6 +37,37 @@ __all__ = [ _cudnn_version = None +def is_compiled_with_xpu(): + """ + Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun + + Returns (bool): whether paddle was built with WITH_XPU=ON + + Examples: + .. code-block:: python + + import paddle + support_xpu = paddle.device.is_compiled_with_xpu() + """ + return core.is_compiled_with_xpu() + + +def XPUPlace(dev_id): + """ + Return a Baidu Kunlun Place + + Parameters: + dev_id(int): Baidu Kunlun device id + + Examples: + .. code-block:: python + + import paddle + place = paddle.device.XPUPlace(0) + """ + return core.XPUPlace(dev_id) + + def get_cudnn_version(): """ This funciton return the version of cudnn. the retuen value is int which represents the diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index d02fdafe995..96efc36ed0a 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -217,6 +217,9 @@ class OpTest(unittest.TestCase): return False return True + def is_xpu_op_test(): + return hasattr(cls, "use_xpu") and cls.use_xpu == True + def is_mkldnn_op_test(): return hasattr(cls, "use_mkldnn") and cls.use_mkldnn == True @@ -239,6 +242,7 @@ class OpTest(unittest.TestCase): if cls.dtype in [np.float32, np.float64] \ and cls.op_type not in op_accuracy_white_list.NO_FP64_CHECK_GRAD_OP_LIST \ and not hasattr(cls, 'exist_fp64_check_grad') \ + and not is_xpu_op_test() \ and not is_mkldnn_op_test(): raise AssertionError( "This test of %s op needs check_grad with fp64 precision." % @@ -336,6 +340,11 @@ class OpTest(unittest.TestCase): self.attrs["use_mkldnn"] == True): self.__class__.use_mkldnn = True + if (hasattr(self, "use_xpu") and self.use_xpu == True) or \ + (hasattr(self, "attrs") and "use_xpu" in self.attrs and \ + self.attrs["use_xpu"] == True): + self.__class__.use_xpu = True + op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) "infer datatype from inputs and outputs for this test case" self.infer_dtype_from_inputs_outputs(self.inputs, self.outputs) @@ -932,6 +941,8 @@ class OpTest(unittest.TestCase): need_run_ops = self._get_need_run_ops(op_desc) res = {} + if hasattr(self, 'attrs') and bool(self.attrs.get('use_xpu', False)): + return for op_desc, father_op_desc in reversed(need_run_ops): # The first one is the forward op has_infer_inplace = fluid.core.has_infer_inplace(op_desc.type()) @@ -1203,6 +1214,11 @@ class OpTest(unittest.TestCase): self.attrs["use_mkldnn"] == True): self.__class__.use_mkldnn = True + if (hasattr(self, "use_xpu") and self.use_xpu == True) or \ + (hasattr(self, "attrs") and "use_xpu" in self.attrs and \ + self.attrs["use_xpu"] == True): + self.__class__.use_xpu = True + places = self._get_places() for place in places: res = self.check_output_with_place(place, atol, no_check_set, diff --git a/python/paddle/fluid/tests/unittests/test_matmul_op.py b/python/paddle/fluid/tests/unittests/test_matmul_op.py index 3eb822bfed8..2d5f098a7fe 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_op.py @@ -14,6 +14,7 @@ from __future__ import print_function +import paddle.fluid.core as core import unittest import numpy as np from op_test import OpTest diff --git a/python/paddle/fluid/tests/unittests/test_mul_op.py b/python/paddle/fluid/tests/unittests/test_mul_op.py index 5f223de1954..927383c1223 100644 --- a/python/paddle/fluid/tests/unittests/test_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_mul_op.py @@ -18,6 +18,8 @@ import unittest import numpy as np import paddle import paddle.fluid.core as core +import sys +sys.path.append("..") from op_test import OpTest import paddle.fluid as fluid from paddle.fluid import Program, program_guard @@ -175,57 +177,5 @@ class TestFP16MulOp2(TestMulOp2): no_grad_set=set('Y')) -@unittest.skipIf(not core.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestXPUMulOp1(TestMulOp): - def init_dtype_type(self): - self.dtype = np.float32 - - def test_check_output(self): - place = core.XPUPlace(0) - self.check_output_with_place(place, atol=1e-1) - - def test_check_grad_normal(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.5) - - def test_check_grad_ingore_x(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) - - def test_check_grad_ingore_y(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) - - -@unittest.skipIf(not core.is_compiled_with_xpu(), - "core is not compiled with XPU") -class TestXPUMulOp2(TestMulOp2): - def init_dtype_type(self): - self.dtype = np.float32 - - def test_check_output(self): - place = core.XPUPlace(0) - self.check_output_with_place(place, atol=2e-1) - - def test_check_grad_normal(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['X', 'Y'], 'Out', max_relative_error=0.9) - - def test_check_grad_ingore_x(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) - - def test_check_grad_ingore_y(self): - place = core.XPUPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y')) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_activation_op.py b/python/paddle/fluid/tests/unittests/xpu/test_activation_op.py new file mode 100755 index 00000000000..788c110a592 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_activation_op.py @@ -0,0 +1,215 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +sys.path.append("..") +import unittest +import numpy as np +import paddle.fluid.core as core +from op_test import OpTest +from scipy.special import expit, erf +import paddle +import paddle.fluid as fluid +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.fluid import compiler, Program, program_guard + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUActivation(OpTest): + def setUp(self): + self.op_type = "exp" + self.init_dtype() + self.init_kernel_type() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + out = np.exp(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + def init_dtype(self): + self.dtype = np.float32 + + def test_check_output(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, atol=1e-3) + + def init_kernel_type(self): + pass + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSigmoid(TestXPUActivation): + def setUp(self): + self.op_type = "sigmoid" + self.init_dtype() + + x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + out = 1 / (1 + np.exp(-x)) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + def test_check_grad(self): + if paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.01) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUTanh(TestXPUActivation): + def setUp(self): + self.op_type = "tanh" + self.init_dtype() + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + out = np.tanh(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSqrt(TestXPUActivation): + def setUp(self): + self.op_type = "sqrt" + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + out = np.sqrt(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUAbs(TestXPUActivation): + def setUp(self): + self.op_type = "abs" + self.init_dtype() + + x = np.random.uniform(-1, 1, [4, 25]).astype(self.dtype) + # Because we set delta = 0.005 in calculating numeric gradient, + # if x is too small, such as 0.002, x_neg will be -0.003 + # x_pos will be 0.007, so the numeric gradient is inaccurate. + # we should avoid this + x[np.abs(x) < 0.005] = 0.02 + out = np.abs(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPURelu(TestXPUActivation): + def setUp(self): + self.op_type = "relu" + self.init_dtype() + + x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + # The same reason with TestAbs + x[np.abs(x) < 0.005] = 0.02 + out = np.maximum(x, 0) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': x} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUGelu(TestXPUActivation): + def setUp(self): + self.op_type = "gelu" + self.init_dtype() + approximate = False + x = np.random.uniform(-1, 1, [11, 17]).astype(self.dtype) + out = gelu(x, approximate) + + self.inputs = {'X': x} + self.outputs = {'Out': out} + self.attrs = {"approximate": approximate, 'use_xpu': True} + + +def gelu(x, approximate): + if approximate: + y_ref = 0.5 * x * (1.0 + np.tanh( + np.sqrt(2 / np.pi) * (x + 0.044715 * np.power(x, 3)))) + else: + y_ref = 0.5 * x * (1 + erf(x / np.sqrt(2))) + return y_ref.astype(x.dtype) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPULog(TestXPUActivation): + def setUp(self): + self.op_type = "log" + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + out = np.log(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUSquare(TestXPUActivation): + def setUp(self): + self.op_type = "square" + self.init_dtype() + + x = np.random.uniform(0.1, 1, [11, 17]).astype(self.dtype) + out = np.square(x) + + self.attrs = {'use_xpu': True} + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUPow(TestXPUActivation): + def setUp(self): + self.op_type = "pow" + self.init_dtype() + + x = np.random.uniform(1, 2, [11, 17]).astype(self.dtype) + out = np.power(x, 3) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.attrs = {'factor': 3.0, 'use_xpu': True} + self.outputs = {'Out': out} + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py new file mode 100644 index 00000000000..9c6e7d21c1a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_elementwise_add_op.py @@ -0,0 +1,346 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import sys +sys.path.append("..") +import unittest +import numpy as np +import paddle +import paddle.fluid.core as core +from op_test import OpTest, skip_check_grad_ci +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard + + +class TestElementwiseAddOp(OpTest): + def init_kernel_type(self): + self.use_mkldnn = False + + def setUp(self): + self.op_type = "elementwise_add" + self.init_dtype() + self.init_input_output() + self.init_kernel_type() + self.init_axis() + + self.inputs = { + 'X': OpTest.np_dtype_to_fluid_dtype(self.x), + 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) + } + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} + self.outputs = {'Out': self.out} + + def test_check_output(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + self.check_output(check_dygraph=(self.use_mkldnn == False)) + + def test_check_grad_normal(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + if self.dtype == np.float16: + return + self.check_grad( + ['X', 'Y'], 'Out', check_dygraph=(self.use_mkldnn == False)) + + def test_check_grad_ingore_x(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + if self.dtype == np.float16: + return + self.check_grad( + ['Y'], + 'Out', + no_grad_set=set("X"), + check_dygraph=(self.use_mkldnn == False)) + + def test_check_grad_ingore_y(self): + # TODO(wangzhongpu): support mkldnn op in dygraph mode + if self.dtype == np.float16: + return + self.check_grad( + ['X'], + 'Out', + no_grad_set=set('Y'), + check_dygraph=(self.use_mkldnn == False)) + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.out = np.add(self.x, self.y) + + def init_dtype(self): + self.dtype = np.float64 + + def init_axis(self): + self.axis = -1 + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUElementwiseAddOp(OpTest): + def setUp(self): + self.op_type = "elementwise_add" + self.init_dtype() + self.init_input_output() + self.init_axis() + + self.inputs = {'X': self.x, 'Y': self.y} + self.attrs = {'axis': self.axis, 'use_mkldnn': False, 'use_xpu': True} + self.outputs = {'Out': self.out} + + def test_check_output(self): + if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_output_with_place(place) + + def test_check_grad_normal(self): + if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['Y'], 'Out') + + def test_check_grad_ingore_y(self): + if self.dtype == np.float32 and paddle.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) + self.check_grad_with_place(place, ['X'], 'Out') + + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.y = np.random.uniform(0.1, 1, [13, 17]).astype(self.dtype) + self.out = np.add(self.x, self.y) + + def init_dtype(self): + self.dtype = np.float32 + + def init_axis(self): + self.axis = -1 + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseAddOp_scalar(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4).astype(self.dtype) + self.y = np.random.rand(1).astype(self.dtype) + self.out = self.x + self.y + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1,1) to test broadcast.") +class TestElementwiseAddOp_scalar2(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4).astype(self.dtype) + self.y = np.random.rand(1, 1).astype(self.dtype) + self.out = self.x + self.y + + +class TestElementwiseAddOp_Vector(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.random((100, )).astype(self.dtype) + self.y = np.random.random((100, )).astype(self.dtype) + self.out = np.add(self.x, self.y) + + +class TestElementwiseAddOp_broadcast_0(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(100, 2, 3).astype(self.dtype) + self.y = np.random.rand(100).astype(self.dtype) + self.out = self.x + self.y.reshape(100, 1, 1) + + def init_axis(self): + self.axis = 0 + + +class TestElementwiseAddOp_broadcast_1(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 100, 3).astype(self.dtype) + self.y = np.random.rand(100).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 100, 1) + + def init_axis(self): + self.axis = 1 + + +class TestElementwiseAddOp_broadcast_2(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 3, 100).astype(self.dtype) + self.y = np.random.rand(100).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 1, 100) + + +class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) + self.y = np.random.rand(10, 12).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 10, 12, 1) + + def init_axis(self): + self.axis = 1 + + +class TestElementwiseAddOp_broadcast_4(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(100, 2, 3, 4).astype(self.dtype) + self.y = np.random.rand(100, 1).astype(self.dtype) + self.out = self.x + self.y.reshape(100, 1, 1, 1) + + def init_axis(self): + self.axis = 0 + + +class TestElementwiseAddOp_broadcast_5(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(10, 3, 12).astype(self.dtype) + self.y = np.random.rand(10, 1, 12).astype(self.dtype) + self.out = self.x + self.y + + +class TestElementwiseAddOp_broadcast_6(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 12, 3, 5).astype(self.dtype) + self.y = np.random.rand(2, 12, 1, 5).astype(self.dtype) + self.out = self.x + self.y + + +class TestElementwiseAddOp_broadcast_7(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(1, 1, 20, 5).astype(self.dtype) + self.y = np.random.rand(20, 5, 1, 1).astype(self.dtype) + self.out = self.x + self.y + + +class TestElementwiseAddOp_rowwise_add_0(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 10, 12).astype(self.dtype) + self.y = np.random.rand(10, 12).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 10, 12) + + def init_axis(self): + self.axis = 1 + + +@skip_check_grad_ci( + reason="[skip shape check] Use y_shape(1) to test broadcast.") +class TestElementwiseAddOp_rowwise_add_1(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(100, 1).astype(self.dtype) + self.y = np.random.rand(1).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 1) + + def init_axis(self): + self.axis = 1 + + +class TestElementwiseAddOp_channelwise_add(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(100, 2, 3).astype(self.dtype) + self.y = np.random.rand(100, 1, 1).astype(self.dtype) + self.out = self.x + self.y + + def init_axis(self): + self.axis = -1 + + +class TestElementwiseAddOp_commonuse_add1(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(2, 3, 100).astype(self.dtype) + self.y = np.random.rand(1, 1, 100).astype(self.dtype) + self.out = self.x + self.y + + def init_axis(self): + self.axis = -1 + + +class TestElementwiseAddOp_commonuse_add2(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(10, 3, 1, 4).astype(self.dtype) + self.y = np.random.rand(10, 1, 12, 1).astype(self.dtype) + self.out = self.x + self.y + + def init_axis(self): + self.axis = -1 + + +class TestElementwiseAddOp_xsize_lessthan_ysize_add(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.rand(10, 12).astype(self.dtype) + self.y = np.random.rand(2, 3, 10, 12).astype(self.dtype) + self.out = self.x + self.y + + def init_axis(self): + self.axis = 2 + + +class TestElementwiseAddOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # the input of elementwise_add must be Variable. + x1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + y1 = fluid.create_lod_tensor( + np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()) + self.assertRaises(TypeError, fluid.layers.elementwise_add, x1, y1) + + # the input dtype of elementwise_add must be float16 or float32 or float64 or int32 or int64 + # float16 only can be set on GPU place + x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="uint8") + y2 = fluid.layers.data(name='y2', shape=[3, 4, 5, 6], dtype="uint8") + self.assertRaises(TypeError, fluid.layers.elementwise_add, x2, y2) + + +class TestAddOp(unittest.TestCase): + def test_name(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data(name="x", shape=[2, 3], dtype="float32") + y = fluid.data(name='y', shape=[2, 3], dtype='float32') + + y_1 = paddle.add(x, y, name='add_res') + self.assertEqual(('add_res' in y_1.name), True) + + def test_declarative(self): + with fluid.program_guard(fluid.Program()): + + def gen_data(): + return { + "x": np.array([2, 3, 4]).astype('float32'), + "y": np.array([1, 5, 2]).astype('float32') + } + + x = fluid.data(name="x", shape=[3], dtype='float32') + y = fluid.data(name="y", shape=[3], dtype='float32') + z = paddle.add(x, y) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + z_value = exe.run(feed=gen_data(), fetch_list=[z.name]) + z_expected = np.array([3., 8., 6.]) + self.assertEqual((z_value == z_expected).all(), True) + + def test_dygraph(self): + with fluid.dygraph.guard(): + np_x = np.array([2, 3, 4]).astype('float64') + np_y = np.array([1, 5, 2]).astype('float64') + x = fluid.dygraph.to_variable(np_x) + y = fluid.dygraph.to_variable(np_y) + z = paddle.add(x, y) + np_z = z.numpy() + z_expected = np.array([3., 8., 6.]) + self.assertEqual((np_z == z_expected).all(), True) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py new file mode 100644 index 00000000000..ac32d224910 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_matmul_op.py @@ -0,0 +1,355 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +sys.path.append("..") +import paddle.fluid.core as core +import unittest +import numpy as np +from op_test import OpTest +import paddle +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +def generate_compatible_shapes(dim_X, dim_Y, transpose_X, transpose_Y): + BATCH_SIZE = 2 + M = 3 + N = 4 + K = 5 + if (dim_X == 1 and transpose_X) or (dim_Y == 1 and transpose_Y): + K = 1 + if dim_X == 1: + if transpose_X: + shape_X = [M] + else: + shape_X = [K] + if dim_Y == 1: + if transpose_Y: + shape_Y = [N] + else: + shape_Y = [K] + if dim_X >= 2: + if transpose_X: + shape_X = [K, M] + else: + shape_X = [M, K] + if dim_X == 3: + shape_X = [BATCH_SIZE] + shape_X + if dim_Y >= 2: + if transpose_Y: + shape_Y = [N, K] + else: + shape_Y = [K, N] + if dim_Y == 3: + shape_Y = [BATCH_SIZE] + shape_Y + return shape_X, shape_Y + + +def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): + """Reference forward implementation using np.matmul.""" + # np.matmul does not support the transpose flags, so we manually + # transpose X and Y appropriately. + if transpose_X: + if X.ndim == 1: + X = X.reshape((X.size, 1)) + elif X.ndim == 2: + X = X.T + else: + dim = [i for i in range(len(X.shape))] + dim[-1], dim[len(X.shape) - 2] = dim[len(X.shape) - 2], dim[-1] + X = np.transpose(X, tuple(dim)) + if transpose_Y: + if Y.ndim == 1: + Y = Y.reshape((1, Y.size)) + else: + dim = [i for i in range(len(Y.shape))] + dim[-1], dim[len(Y.shape) - 2] = dim[len(Y.shape) - 2], dim[-1] + Y = np.transpose(Y, tuple(dim)) + + Out = np.matmul(X, Y) + if not Out.shape: + # We do not support 0-dimensional Tensors (scalars). So where + # np.matmul outputs a scalar, we must convert to a Tensor of + # shape (1, ) instead. + # Everywhere else, we are compatible with np.matmul. + Out = np.array([Out], dtype="float32") + return Out + + +class Generator(object): + def setUp(self): + self.op_type = "matmul" + X = np.random.random(self.shape_X).astype("float32") + Y = np.random.random(self.shape_Y).astype("float32") + Out = reference_matmul(X, Y, self.transpose_X, self.transpose_Y) + self.inputs = {'X': X, 'Y': Y} + self.attrs = { + 'transpose_X': self.transpose_X, + 'transpose_Y': self.transpose_Y + } + self.outputs = {'Out': Out} + + def test_check_output(self): + self.check_output() + if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len( + self.inputs['Y'].shape) and self.inputs['X'].shape[ + 0] == self.inputs['Y'].shape[0]: + place = paddle.XPUPlace(0) + self.check_output_with_place(place, atol=1e-3) + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-3) + if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len( + self.inputs['Y'].shape) and self.inputs['X'].shape[ + 0] == self.inputs['Y'].shape[0]: + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=5e-2) + + def test_check_grad_ignore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=1e-3, no_grad_set=set("X")) + if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len( + self.inputs['Y'].shape) and self.inputs['X'].shape[ + 0] == self.inputs['Y'].shape[0]: + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], + 'Out', + max_relative_error=5e-2, + no_grad_set=set("X")) + + def test_check_grad_ignore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=1e-3, no_grad_set=set('Y')) + if paddle.is_compiled_with_xpu() and len(self.inputs['X'].shape) == len( + self.inputs['Y'].shape) and self.inputs['X'].shape[ + 0] == self.inputs['Y'].shape[0]: + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], + 'Out', + max_relative_error=5e-2, + no_grad_set=set('Y')) + + +class TestMatmulOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # The inputs type of matmul_op must be Variable. + input1 = 12 + self.assertRaises(TypeError, fluid.layers.matmul, input1, input1) + # The inputs dtype of matmul_op must be float32, float64. + input2 = fluid.layers.data( + name='input2', shape=[10, 10], dtype="int32") + self.assertRaises(TypeError, fluid.layers.matmul, input2, input2) + input3 = fluid.layers.data( + name='input3', shape=[2, 2], dtype="float16") + fluid.layers.matmul(input3, input3) + + +# Negative dimension generation +def generate_negative_dims(in_shape): + from itertools import combinations + size = len(in_shape) + indexs = list() + shapes = list() + for i in range(size): + indexs.extend(list(combinations([j for j in range(size)], i + 1))) + for idx in indexs: + shapes.append( + [in_shape[i] if i not in idx else -1 for i in range(size)]) + return shapes + + +# Build program with inputs sizes that contain negative numbers +def test_negative_dims_program(obj): + for shape_x in generate_negative_dims(obj.shape_X): + for shape_y in generate_negative_dims(obj.shape_Y): + X = np.random.random(obj.shape_X).astype("float32") + Y = np.random.random(obj.shape_Y).astype("float32") + Ref = reference_matmul(X, Y, obj.transpose_X, obj.transpose_Y) + with program_guard(Program(), Program()): + x = fluid.data(name='x', shape=shape_x, dtype='float32') + y = fluid.data(name='y', shape=shape_y, dtype='float32') + output = fluid.layers.matmul(x, y, obj.transpose_X, + obj.transpose_Y) + obj.assertEqual(len(Ref.shape), len(output.shape)) + for idx in range(len(Ref.shape)): + if output.shape[idx] != -1: + obj.assertEqual(Ref.shape[idx], output.shape[idx]) + exe = fluid.Executor(fluid.CPUPlace()) + res, = exe.run(fluid.default_main_program(), + feed={'x': X, + 'y': Y}, + fetch_list=[output]) + np.allclose(res, Ref, atol=1e-5) + + +# Generate program api cases for all negative possibilities +def api_test(dim_x, dim_y, trans_x, trans_y): + test_name = ('TestMatMulAPI_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( + dim_x, dim_y, trans_x, trans_y)) + shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x, + trans_y) + globals()[test_name] = type(test_name, (unittest.TestCase, ), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + 'test_propram': test_negative_dims_program, + }) + + +# Generate operators cases for all possibilities +def inject_test(dim_x, dim_y, trans_x, trans_y): + test_name = ('TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( + dim_x, dim_y, trans_x, trans_y)) + shape_x, shape_y = generate_compatible_shapes(dim_x, dim_y, trans_x, + trans_y) + globals()[test_name] = type(test_name, (Generator, OpTest), { + 'shape_X': shape_x, + 'shape_Y': shape_y, + 'transpose_X': trans_x, + 'transpose_Y': trans_y, + }) + + +for dim_X in (1, 2, 3): + for dim_Y in (1, 2, 3): + for transose_x in (False, True): + for transose_y in (False, True): + inject_test(dim_X, dim_Y, transose_x, transose_y) + api_test(dim_X, dim_Y, transose_x, transose_y) + + +# Test case n-dim +def generate_compatible_shapes(dim, transpose_X, transpose_Y): + M = 2 + N = 4 + K = 3 + shape_X = [2 for _ in range(dim - 2)] + shape_Y = [2 for _ in range(dim - 2)] + + if transpose_X: + shape_X += [K, M] + else: + shape_X += [M, K] + + if transpose_Y: + shape_Y += [N, K] + else: + shape_Y += [K, N] + + return shape_X, shape_Y + + +# # Test case n-dim +for dim in [4]: + for transpose_X in [False, True]: + for transpose_Y in [False, True]: + test_name = ( + 'TestMatMulOp_dimX_{}_dim_Y_{}_transX_{}_transY_{}'.format( + dim, dim, transpose_X, transpose_Y)) + shape_X, shape_Y = generate_compatible_shapes(dim, transpose_X, + transpose_Y) + globals()[test_name] = type(test_name, (Generator, OpTest), { + 'shape_X': shape_X, + 'shape_Y': shape_Y, + 'transpose_X': transpose_X, + 'transpose_Y': transpose_Y, + }) + + +class API_TestMm(unittest.TestCase): + def test_out(self): + with fluid.program_guard(fluid.Program()): + x = fluid.data(name="x", shape=[2], dtype="float64") + y = fluid.data(name='y', shape=[2], dtype='float64') + res = fluid.data(name="output", shape=[1], dtype="float64") + result = paddle.mm(x, y) + exe = fluid.Executor(fluid.CPUPlace()) + data1 = np.random.rand(2) + data2 = np.random.rand(2) + np_res = exe.run(feed={'x': data1, 'y': data2}, fetch_list=[result]) + expected_result = np.matmul( + data1.reshape(1, 2), data2.reshape(2, 1)) + + self.assertTrue( + np.allclose( + np_res, expected_result, atol=1e-5), + "two value is\ + {}\n{}, check diff!".format(np_res, expected_result)) + + def test_dygraph_without_out(self): + device = fluid.CPUPlace() + with fluid.dygraph.guard(device): + input_array1 = np.random.rand(3, 4).astype("float64") + input_array2 = np.random.rand(4, 3).astype("float64") + data1 = fluid.dygraph.to_variable(input_array1) + data2 = fluid.dygraph.to_variable(input_array2) + out = paddle.mm(data1, data2) + expected_result = np.matmul(input_array1, input_array2) + self.assertTrue(np.allclose(expected_result, out.numpy())) + + +class Test_API_Matmul(unittest.TestCase): + def test_dygraph_without_out(self): + device = fluid.CPUPlace() + with fluid.dygraph.guard(device): + input_array1 = np.random.rand(3, 4).astype("float64") + input_array2 = np.random.rand(4, 3).astype("float64") + data1 = fluid.dygraph.to_variable(input_array1) + data2 = fluid.dygraph.to_variable(input_array2) + out = paddle.matmul(data1, data2) + expected_result = np.matmul(input_array1, input_array2) + self.assertTrue(np.allclose(expected_result, out.numpy())) + + +class API_TestMmError(unittest.TestCase): + def test_errors(self): + def test_error1(): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = fluid.data(name="data1", shape=[10, 2], dtype="float32") + data2 = fluid.data(name="data2", shape=[3, 10], dtype="float32") + paddle.mm(data1, data2) + + self.assertRaises(ValueError, test_error1) + + def test_error2(): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = fluid.data( + name="data1", shape=[-1, 10, 2], dtype="float32") + data2 = fluid.data( + name="data2", shape=[-1, 2, 10], dtype="float32") + paddle.mm(data1, data2) + + test_error2() + + def test_error3(): + with fluid.program_guard(fluid.Program(), fluid.Program()): + data1 = fluid.data( + name="data1", shape=[10, 10, 2], dtype="float32") + data2 = fluid.data( + name="data2", shape=[3, 2, 10], dtype="float32") + paddle.mm(data1, data2) + + self.assertRaises(ValueError, test_error3) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mul_op.py b/python/paddle/fluid/tests/unittests/xpu/test_mul_op.py new file mode 100644 index 00000000000..94ab5b71e4f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_mul_op.py @@ -0,0 +1,161 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.fluid.core as core +import sys +sys.path.append("..") +from op_test import OpTest +import paddle.fluid as fluid +from paddle.fluid import Program, program_guard + + +class TestMulOp(OpTest): + def setUp(self): + self.op_type = "mul" + self.dtype = np.float64 + self.init_dtype_type() + self.inputs = { + 'X': np.random.random((20, 5)).astype(self.dtype), + 'Y': np.random.random((5, 21)).astype(self.dtype) + } + self.outputs = {'Out': np.dot(self.inputs['X'], self.inputs['Y'])} + + def init_dtype_type(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + + +class TestMulOpError(unittest.TestCase): + def test_errors(self): + with program_guard(Program(), Program()): + # The input type of mul_op must be Variable. + x1 = fluid.create_lod_tensor( + np.array([[-1]]), [[1]], fluid.CPUPlace()) + x2 = fluid.create_lod_tensor( + np.array([[-1]]), [[1]], fluid.CPUPlace()) + self.assertRaises(TypeError, fluid.layers.mul, x1, x2) + # The input dtype of mul_op must be float32 or float64. + x3 = fluid.layers.data(name='x3', shape=[4], dtype="int32") + x4 = fluid.layers.data(name='x4', shape=[4], dtype="int32") + self.assertRaises(TypeError, fluid.layers.mul, x3, x4) + + +class TestMulOp2(OpTest): + def setUp(self): + self.op_type = "mul" + self.dtype = np.float64 + self.init_dtype_type() + self.inputs = { + 'X': np.random.random((3, 4, 2, 9)).astype(self.dtype), + 'Y': np.random.random((3, 6, 1, 2, 3)).astype(self.dtype) + } + self.attrs = { + 'x_num_col_dims': 2, + 'y_num_col_dims': 2, + } + result = np.dot(self.inputs['X'].reshape(3 * 4, 2 * 9), + self.inputs['Y'].reshape(3 * 6, 1 * 2 * 3)) + result = result.reshape(3, 4, 1, 2, 3) + self.outputs = {'Out': result} + + def init_dtype_type(self): + pass + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X', 'Y'], 'Out') + + def test_check_grad_ingore_x(self): + self.check_grad( + ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set('X')) + + def test_check_grad_ignore_y(self): + self.check_grad( + ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUMulOp1(TestMulOp): + def init_dtype_type(self): + self.dtype = np.float32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, atol=1e-1) + + def test_check_grad_normal(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.5) + + def test_check_grad_ingore_x(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.5, no_grad_set=set('Y')) + + +@unittest.skipIf(not paddle.is_compiled_with_xpu(), + "core is not compiled with XPU") +class TestXPUMulOp2(TestMulOp2): + def init_dtype_type(self): + self.dtype = np.float32 + + def test_check_output(self): + place = paddle.XPUPlace(0) + self.check_output_with_place(place, atol=2e-1) + + def test_check_grad_normal(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X', 'Y'], 'Out', max_relative_error=0.9) + + def test_check_grad_ingore_x(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['Y'], 'Out', max_relative_error=0.5, no_grad_set=set("X")) + + def test_check_grad_ingore_y(self): + place = paddle.XPUPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=0.9, no_grad_set=set('Y')) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/wlist.json b/tools/wlist.json index 9b36ac6adc7..3ca14cd1dd6 100644 --- a/tools/wlist.json +++ b/tools/wlist.json @@ -407,7 +407,9 @@ "TransformerDecoder.prepare_incremental_cache", "LinearChainCRF.forward", "CRFDecoding.forward", - "SequenceTagging.forward" + "SequenceTagging.forward", + "XPUPlace", + "is_compiled_with_xpu" ], "gpu_not_white":[ "deformable_conv", -- GitLab