未验证 提交 1b5a1e26 编写于 作者: jm_12138's avatar jm_12138 提交者: GitHub

update modnet_resnet50vd_matting (#2100)

* add requirements.txt

* add init

* update format
上级 755425ce
...@@ -11,33 +11,32 @@ ...@@ -11,33 +11,32 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse
import os import os
import time import time
import argparse from typing import Callable
from typing import Callable, Union, List, Tuple from typing import List
from typing import Union
import numpy as np
import cv2 import cv2
import scipy import modnet_resnet50vd_matting.processor as P
import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddlehub.module.module import moduleinfo import scipy
import paddlehub.vision.segmentation_transforms as T
from paddlehub.module.module import moduleinfo, runnable, serving
from modnet_resnet50vd_matting.resnet import ResNet50_vd from modnet_resnet50vd_matting.resnet import ResNet50_vd
import modnet_resnet50vd_matting.processor as P
from paddlehub.module.module import moduleinfo
from paddlehub.module.module import runnable
from paddlehub.module.module import serving
@moduleinfo(
name="modnet_resnet50vd_matting", @moduleinfo(name="modnet_resnet50vd_matting",
type="CV/matting", type="CV/matting",
author="paddlepaddle", author="paddlepaddle",
summary="modnet_resnet50vd_matting is a matting model", summary="modnet_resnet50vd_matting is a matting model",
version="1.0.0" version="1.0.0")
)
class MODNetResNet50Vd(nn.Layer): class MODNetResNet50Vd(nn.Layer):
""" """
The MODNet implementation based on PaddlePaddle. The MODNet implementation based on PaddlePaddle.
...@@ -51,14 +50,13 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -51,14 +50,13 @@ class MODNetResNet50Vd(nn.Layer):
pretrained(str, optional): The path of pretrianed model. Defautl: None. pretrained(str, optional): The path of pretrianed model. Defautl: None.
""" """
def __init__(self, hr_channels:int = 32, pretrained=None): def __init__(self, hr_channels: int = 32, pretrained=None):
super(MODNetResNet50Vd, self).__init__() super(MODNetResNet50Vd, self).__init__()
self.backbone = ResNet50_vd() self.backbone = ResNet50_vd()
self.pretrained = pretrained self.pretrained = pretrained
self.head = MODNetHead( self.head = MODNetHead(hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels)
hr_channels=hr_channels, backbone_channels=self.backbone.feat_channels)
self.blurer = GaussianBlurLayer(1, 3) self.blurer = GaussianBlurLayer(1, 3)
self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()]) self.transforms = P.Compose([P.LoadImages(), P.ResizeByShort(), P.ResizeToIntMult(), P.Normalize()])
...@@ -73,14 +71,14 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -73,14 +71,14 @@ class MODNetResNet50Vd(nn.Layer):
self.set_dict(model_dict) self.set_dict(model_dict)
print("load pretrained parameters success") print("load pretrained parameters success")
def preprocess(self, img: Union[str, np.ndarray] , transforms: Callable, trimap: Union[str, np.ndarray] = None): def preprocess(self, img: Union[str, np.ndarray], transforms: Callable, trimap: Union[str, np.ndarray] = None):
data = {} data = {}
data['img'] = img data['img'] = img
if trimap is not None: if trimap is not None:
data['trimap'] = trimap data['trimap'] = trimap
data['gt_fields'] = ['trimap'] data['gt_fields'] = ['trimap']
data['trans_info'] = [] data['trans_info'] = []
data = self.transforms(data) data = transforms(data)
data['img'] = paddle.to_tensor(data['img']) data['img'] = paddle.to_tensor(data['img'])
data['img'] = data['img'].unsqueeze(0) data['img'] = data['img'].unsqueeze(0)
if trimap is not None: if trimap is not None:
...@@ -95,9 +93,13 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -95,9 +93,13 @@ class MODNetResNet50Vd(nn.Layer):
y = self.head(inputs=inputs, feat_list=feat_list) y = self.head(inputs=inputs, feat_list=feat_list)
return y return y
def predict(self, image_list: list, trimap_list: list = None, visualization: bool =False, save_path: str = "modnet_resnet50vd_matting_output"): def predict(self,
image_list: list,
trimap_list: list = None,
visualization: bool = False,
save_path: str = "modnet_resnet50vd_matting_output"):
self.eval() self.eval()
result= [] result = []
with paddle.no_grad(): with paddle.no_grad():
for i, im_path in enumerate(image_list): for i, im_path in enumerate(image_list):
trimap = trimap_list[i] if trimap_list is not None else None trimap = trimap_list[i] if trimap_list is not None else None
...@@ -118,7 +120,7 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -118,7 +120,7 @@ class MODNetResNet50Vd(nn.Layer):
return result return result
@serving @serving
def serving_method(self, images: list, trimaps:list = None, **kwargs): def serving_method(self, images: list, trimaps: list = None, **kwargs):
""" """
Run as a service. Run as a service.
""" """
...@@ -128,7 +130,7 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -128,7 +130,7 @@ class MODNetResNet50Vd(nn.Layer):
else: else:
trimap_decoder = None trimap_decoder = None
outputs = self.predict(image_list=images_decode, trimap_list= trimap_decoder, **kwargs) outputs = self.predict(image_list=images_decode, trimap_list=trimap_decoder, **kwargs)
serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))] serving_data = [P.cv2_to_base64(outputs[i]) for i in range(len(outputs))]
results = {'data': serving_data} results = {'data': serving_data}
...@@ -139,8 +141,7 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -139,8 +141,7 @@ class MODNetResNet50Vd(nn.Layer):
""" """
Run as a command. Run as a command.
""" """
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name),
description="Run the {} module.".format(self.name),
prog='hub run {}'.format(self.name), prog='hub run {}'.format(self.name),
usage='%(prog)s', usage='%(prog)s',
add_help=True) add_help=True)
...@@ -155,7 +156,10 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -155,7 +156,10 @@ class MODNetResNet50Vd(nn.Layer):
else: else:
trimap_list = None trimap_list = None
results = self.predict(image_list=[args.input_path], trimap_list=trimap_list, save_path=args.output_dir, visualization=args.visualization) results = self.predict(image_list=[args.input_path],
trimap_list=trimap_list,
save_path=args.output_dir,
visualization=args.visualization)
return results return results
...@@ -164,10 +168,14 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -164,10 +168,14 @@ class MODNetResNet50Vd(nn.Layer):
Add the command config options. Add the command config options.
""" """
self.arg_config_group.add_argument( self.arg_config_group.add_argument('--output_dir',
'--output_dir', type=str, default="modnet_resnet50vd_matting_output", help="The directory to save output images.") type=str,
self.arg_config_group.add_argument( default="modnet_resnet50vd_matting_output",
'--visualization', type=bool, default=True, help="whether to save output as images.") help="The directory to save output images.")
self.arg_config_group.add_argument('--visualization',
type=bool,
default=True,
help="whether to save output as images.")
def add_module_input_arg(self): def add_module_input_arg(self):
""" """
...@@ -177,11 +185,11 @@ class MODNetResNet50Vd(nn.Layer): ...@@ -177,11 +185,11 @@ class MODNetResNet50Vd(nn.Layer):
self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to trimap.") self.arg_input_group.add_argument('--trimap_path', type=str, default=None, help="path to trimap.")
class MODNetHead(nn.Layer): class MODNetHead(nn.Layer):
""" """
Segmentation head. Segmentation head.
""" """
def __init__(self, hr_channels: int, backbone_channels: int): def __init__(self, hr_channels: int, backbone_channels: int):
super().__init__() super().__init__()
...@@ -196,37 +204,24 @@ class MODNetHead(nn.Layer): ...@@ -196,37 +204,24 @@ class MODNetHead(nn.Layer):
return pred_matte return pred_matte
class FusionBranch(nn.Layer): class FusionBranch(nn.Layer):
def __init__(self, hr_channels: int, enc_channels: int): def __init__(self, hr_channels: int, enc_channels: int):
super().__init__() super().__init__()
self.conv_lr4x = Conv2dIBNormRelu( self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
enc_channels[2], hr_channels, 5, stride=1, padding=2)
self.conv_f2x = Conv2dIBNormRelu( self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
2 * hr_channels, hr_channels, 3, stride=1, padding=1)
self.conv_f = nn.Sequential( self.conv_f = nn.Sequential(
Conv2dIBNormRelu( Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1), Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False))
Conv2dIBNormRelu(
int(hr_channels / 2),
1,
1,
stride=1,
padding=0,
with_ibn=False,
with_relu=False))
def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor) -> paddle.Tensor: def forward(self, img: paddle.Tensor, lr8x: paddle.Tensor, hr2x: paddle.Tensor) -> paddle.Tensor:
lr4x = F.interpolate( lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
lr8x, scale_factor=2, mode='bilinear', align_corners=False)
lr4x = self.conv_lr4x(lr4x) lr4x = self.conv_lr4x(lr4x)
lr2x = F.interpolate( lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
lr4x, scale_factor=2, mode='bilinear', align_corners=False)
f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1)) f2x = self.conv_f2x(paddle.concat((lr2x, hr2x), axis=1))
f = F.interpolate( f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
f2x, scale_factor=2, mode='bilinear', align_corners=False)
f = self.conv_f(paddle.concat((f, img), axis=1)) f = self.conv_f(paddle.concat((f, img), axis=1))
pred_matte = F.sigmoid(f) pred_matte = F.sigmoid(f)
...@@ -238,56 +233,33 @@ class HRBranch(nn.Layer): ...@@ -238,56 +233,33 @@ class HRBranch(nn.Layer):
High Resolution Branch of MODNet High Resolution Branch of MODNet
""" """
def __init__(self, hr_channels: int, enc_channels:int): def __init__(self, hr_channels: int, enc_channels: int):
super().__init__() super().__init__()
self.tohr_enc2x = Conv2dIBNormRelu( self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
enc_channels[0], hr_channels, 1, stride=1, padding=0) self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
self.conv_enc2x = Conv2dIBNormRelu(
hr_channels + 3, hr_channels, 3, stride=2, padding=1)
self.tohr_enc4x = Conv2dIBNormRelu( self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
enc_channels[1], hr_channels, 1, stride=1, padding=0) self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
self.conv_enc4x = Conv2dIBNormRelu(
2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
self.conv_hr4x = nn.Sequential( self.conv_hr4x = nn.Sequential(
Conv2dIBNormRelu( Conv2dIBNormRelu(2 * hr_channels + enc_channels[2] + 3, 2 * hr_channels, 3, stride=1, padding=1),
2 * hr_channels + enc_channels[2] + 3, Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
2 * hr_channels, Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1))
3,
stride=1, self.conv_hr2x = nn.Sequential(Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
padding=1), Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
Conv2dIBNormRelu(
2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
Conv2dIBNormRelu(
2 * hr_channels, hr_channels, 3, stride=1, padding=1))
self.conv_hr2x = nn.Sequential(
Conv2dIBNormRelu(
2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
Conv2dIBNormRelu(
2 * hr_channels, hr_channels, 3, stride=1, padding=1),
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1), Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1)) Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1))
self.conv_hr = nn.Sequential( self.conv_hr = nn.Sequential(
Conv2dIBNormRelu( Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
hr_channels + 3, hr_channels, 3, stride=1, padding=1), Conv2dIBNormRelu(hr_channels, 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False))
Conv2dIBNormRelu(
hr_channels,
1,
1,
stride=1,
padding=0,
with_ibn=False,
with_relu=False))
def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor, lr8x: paddle.Tensor) -> paddle.Tensor: def forward(self, img: paddle.Tensor, enc2x: paddle.Tensor, enc4x: paddle.Tensor,
img2x = F.interpolate( lr8x: paddle.Tensor) -> paddle.Tensor:
img, scale_factor=1 / 2, mode='bilinear', align_corners=False) img2x = F.interpolate(img, scale_factor=1 / 2, mode='bilinear', align_corners=False)
img4x = F.interpolate( img4x = F.interpolate(img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
img, scale_factor=1 / 4, mode='bilinear', align_corners=False)
enc2x = self.tohr_enc2x(enc2x) enc2x = self.tohr_enc2x(enc2x)
hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1)) hr4x = self.conv_enc2x(paddle.concat((img2x, enc2x), axis=1))
...@@ -295,12 +267,10 @@ class HRBranch(nn.Layer): ...@@ -295,12 +267,10 @@ class HRBranch(nn.Layer):
enc4x = self.tohr_enc4x(enc4x) enc4x = self.tohr_enc4x(enc4x)
hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1)) hr4x = self.conv_enc4x(paddle.concat((hr4x, enc4x), axis=1))
lr4x = F.interpolate( lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
lr8x, scale_factor=2, mode='bilinear', align_corners=False)
hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1)) hr4x = self.conv_hr4x(paddle.concat((hr4x, lr4x, img4x), axis=1))
hr2x = F.interpolate( hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
hr4x, scale_factor=2, mode='bilinear', align_corners=False)
hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1)) hr2x = self.conv_hr2x(paddle.concat((hr2x, enc2x), axis=1))
pred_detail = None pred_detail = None
return pred_detail, hr2x return pred_detail, hr2x
...@@ -310,15 +280,13 @@ class LRBranch(nn.Layer): ...@@ -310,15 +280,13 @@ class LRBranch(nn.Layer):
""" """
Low Resolution Branch of MODNet Low Resolution Branch of MODNet
""" """
def __init__(self, backbone_channels: int): def __init__(self, backbone_channels: int):
super().__init__() super().__init__()
self.se_block = SEBlock(backbone_channels[4], reduction=4) self.se_block = SEBlock(backbone_channels[4], reduction=4)
self.conv_lr16x = Conv2dIBNormRelu( self.conv_lr16x = Conv2dIBNormRelu(backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2)
backbone_channels[4], backbone_channels[3], 5, stride=1, padding=2) self.conv_lr8x = Conv2dIBNormRelu(backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2)
self.conv_lr8x = Conv2dIBNormRelu( self.conv_lr = Conv2dIBNormRelu(backbone_channels[2],
backbone_channels[3], backbone_channels[2], 5, stride=1, padding=2)
self.conv_lr = Conv2dIBNormRelu(
backbone_channels[2],
1, 1,
3, 3,
stride=2, stride=2,
...@@ -330,11 +298,9 @@ class LRBranch(nn.Layer): ...@@ -330,11 +298,9 @@ class LRBranch(nn.Layer):
enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4] enc2x, enc4x, enc32x = feat_list[0], feat_list[1], feat_list[4]
enc32x = self.se_block(enc32x) enc32x = self.se_block(enc32x)
lr16x = F.interpolate( lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
enc32x, scale_factor=2, mode='bilinear', align_corners=False)
lr16x = self.conv_lr16x(lr16x) lr16x = self.conv_lr16x(lr16x)
lr8x = F.interpolate( lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
lr16x, scale_factor=2, mode='bilinear', align_corners=False)
lr8x = self.conv_lr8x(lr8x) lr8x = self.conv_lr8x(lr8x)
pred_semantic = None pred_semantic = None
...@@ -376,7 +342,7 @@ class Conv2dIBNormRelu(nn.Layer): ...@@ -376,7 +342,7 @@ class Conv2dIBNormRelu(nn.Layer):
kernel_size: int, kernel_size: int,
stride: int = 1, stride: int = 1,
padding: int = 0, padding: int = 0,
dilation:int = 1, dilation: int = 1,
groups: int = 1, groups: int = 1,
bias_attr: paddle.ParamAttr = None, bias_attr: paddle.ParamAttr = None,
with_ibn: bool = True, with_ibn: bool = True,
...@@ -385,8 +351,7 @@ class Conv2dIBNormRelu(nn.Layer): ...@@ -385,8 +351,7 @@ class Conv2dIBNormRelu(nn.Layer):
super().__init__() super().__init__()
layers = [ layers = [
nn.Conv2D( nn.Conv2D(in_channels,
in_channels,
out_channels, out_channels,
kernel_size, kernel_size,
stride=stride, stride=stride,
...@@ -413,20 +378,13 @@ class SEBlock(nn.Layer): ...@@ -413,20 +378,13 @@ class SEBlock(nn.Layer):
SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
""" """
def __init__(self, num_channels: int, reduction:int = 1): def __init__(self, num_channels: int, reduction: int = 1):
super().__init__() super().__init__()
self.pool = nn.AdaptiveAvgPool2D(1) self.pool = nn.AdaptiveAvgPool2D(1)
self.conv = nn.Sequential( self.conv = nn.Sequential(nn.Conv2D(num_channels, int(num_channels // reduction), 1,
nn.Conv2D(
num_channels,
int(num_channels // reduction),
1,
bias_attr=False), nn.ReLU(), bias_attr=False), nn.ReLU(),
nn.Conv2D( nn.Conv2D(int(num_channels // reduction), num_channels, 1, bias_attr=False),
int(num_channels // reduction), nn.Sigmoid())
num_channels,
1,
bias_attr=False), nn.Sigmoid())
def forward(self, x: paddle.Tensor) -> paddle.Tensor: def forward(self, x: paddle.Tensor) -> paddle.Tensor:
w = self.pool(x) w = self.pool(x)
...@@ -454,14 +412,7 @@ class GaussianBlurLayer(nn.Layer): ...@@ -454,14 +412,7 @@ class GaussianBlurLayer(nn.Layer):
self.op = nn.Sequential( self.op = nn.Sequential(
nn.Pad2D(int(self.kernel_size / 2), mode='reflect'), nn.Pad2D(int(self.kernel_size / 2), mode='reflect'),
nn.Conv2D( nn.Conv2D(channels, channels, self.kernel_size, stride=1, padding=0, bias_attr=False, groups=channels))
channels,
channels,
self.kernel_size,
stride=1,
padding=0,
bias_attr=False,
groups=channels))
self._init_kernel() self._init_kernel()
self.op[1].weight.stop_gradient = True self.op[1].weight.stop_gradient = True
...@@ -479,8 +430,7 @@ class GaussianBlurLayer(nn.Layer): ...@@ -479,8 +430,7 @@ class GaussianBlurLayer(nn.Layer):
exit() exit()
elif not x.shape[1] == self.channels: elif not x.shape[1] == self.channels:
print('In \'GaussianBlurLayer\', the required channel ({0}) is' print('In \'GaussianBlurLayer\', the required channel ({0}) is'
'not the same as input ({1})\n'.format( 'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
self.channels, x.shape[1]))
exit() exit()
return self.op(x) return self.op(x)
......
...@@ -11,17 +11,17 @@ ...@@ -11,17 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import random
import base64 import base64
from typing import Callable, Union, List, Tuple from typing import Callable
from typing import List
from typing import Tuple
from typing import Union
import cv2 import cv2
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
from paddleseg.transforms import functional from paddleseg.transforms import functional
from PIL import Image
class Compose: class Compose:
...@@ -61,6 +61,7 @@ class LoadImages: ...@@ -61,6 +61,7 @@ class LoadImages:
Args: Args:
to_rgb (bool, optional): If converting image to RGB color space. Default: True. to_rgb (bool, optional): If converting image to RGB color space. Default: True.
""" """
def __init__(self, to_rgb: bool = True): def __init__(self, to_rgb: bool = True):
self.to_rgb = to_rgb self.to_rgb = to_rgb
...@@ -95,7 +96,7 @@ class ResizeByShort: ...@@ -95,7 +96,7 @@ class ResizeByShort:
short_size (int): The target size of short side. short_size (int): The target size of short side.
""" """
def __init__(self, short_size: int =512): def __init__(self, short_size: int = 512):
self.short_size = short_size self.short_size = short_size
def __call__(self, data: dict) -> dict: def __call__(self, data: dict) -> dict:
...@@ -140,14 +141,13 @@ class Normalize: ...@@ -140,14 +141,13 @@ class Normalize:
ValueError: When mean/std is not list or any value in std is 0. ValueError: When mean/std is not list or any value in std is 0.
""" """
def __init__(self, mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5), std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)): def __init__(self,
mean: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5),
std: Union[List[float], Tuple[float]] = (0.5, 0.5, 0.5)):
self.mean = mean self.mean = mean
self.std = std self.std = std
if not (isinstance(self.mean, (list, tuple)) if not (isinstance(self.mean, (list, tuple)) and isinstance(self.std, (list, tuple))):
and isinstance(self.std, (list, tuple))): raise ValueError("{}: input type is invalid. It should be list or tuple".format(self))
raise ValueError(
"{}: input type is invalid. It should be list or tuple".format(
self))
from functools import reduce from functools import reduce
if reduce(lambda x, y: x * y, self.std) == 0: if reduce(lambda x, y: x * y, self.std) == 0:
raise ValueError('{}: std is invalid!'.format(self)) raise ValueError('{}: std is invalid!'.format(self))
...@@ -177,6 +177,7 @@ def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]): ...@@ -177,6 +177,7 @@ def reverse_transform(alpha: paddle.Tensor, trans_info: List[str]):
raise Exception("Unexpected info '{}' in im_info".format(item[0])) raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return alpha return alpha
def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None): def save_alpha_pred(alpha: np.ndarray, trimap: np.ndarray = None):
""" """
The value of alpha is range [0, 1], shape should be [h,w] The value of alpha is range [0, 1], shape should be [h,w]
......
...@@ -11,13 +11,10 @@ ...@@ -11,13 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import paddle.nn.functional as F import paddle.nn.functional as F
from paddleseg.models import layers from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = ["ResNet50_vd"] __all__ = ["ResNet50_vd"]
...@@ -39,10 +36,8 @@ class ConvBNLayer(nn.Layer): ...@@ -39,10 +36,8 @@ class ConvBNLayer(nn.Layer):
super(ConvBNLayer, self).__init__() super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2D( self._pool2d_avg = nn.AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
kernel_size=2, stride=2, padding=0, ceil_mode=True) self._conv = nn.Conv2D(in_channels=in_channels,
self._conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=kernel_size, kernel_size=kernel_size,
stride=stride, stride=stride,
...@@ -76,30 +71,20 @@ class BottleneckBlock(nn.Layer): ...@@ -76,30 +71,20 @@ class BottleneckBlock(nn.Layer):
dilation: int = 1): dilation: int = 1):
super(BottleneckBlock, self).__init__() super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu')
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu')
self.dilation = dilation self.dilation = dilation
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(in_channels=out_channels,
in_channels=out_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
act='relu', act='relu',
dilation=dilation) dilation=dilation)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None)
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(in_channels=in_channels,
in_channels=in_channels,
out_channels=out_channels * 4, out_channels=out_channels * 4,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
...@@ -133,29 +118,19 @@ class BottleneckBlock(nn.Layer): ...@@ -133,29 +118,19 @@ class BottleneckBlock(nn.Layer):
class BasicBlock(nn.Layer): class BasicBlock(nn.Layer):
"""Basic residual block""" """Basic residual block"""
def __init__(self,
in_channels: int, def __init__(self, in_channels: int, out_channels: int, stride: int, shortcut: bool = True, if_first: bool = False):
out_channels: int,
stride: int,
shortcut: bool = True,
if_first: bool = False):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.stride = stride self.stride = stride
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(in_channels=in_channels,
in_channels=in_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=3, kernel_size=3,
stride=stride, stride=stride,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None)
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(in_channels=in_channels,
in_channels=in_channels,
out_channels=out_channels, out_channels=out_channels,
kernel_size=1, kernel_size=1,
stride=1, stride=1,
...@@ -212,13 +187,11 @@ class ResNet_vd(nn.Layer): ...@@ -212,13 +187,11 @@ class ResNet_vd(nn.Layer):
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
elif layers == 200: elif layers == 200:
depth = [3, 12, 48, 3] depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024 num_channels = [64, 256, 512, 1024] if layers >= 50 else [64, 64, 128, 256]
] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
# for channels of four returned stages # for channels of four returned stages
self.feat_channels = [c * 4 for c in num_filters self.feat_channels = [c * 4 for c in num_filters] if layers >= 50 else num_filters
] if layers >= 50 else num_filters
self.feat_channels = [64] + self.feat_channels self.feat_channels = [64] + self.feat_channels
dilation_dict = None dilation_dict = None
...@@ -227,24 +200,9 @@ class ResNet_vd(nn.Layer): ...@@ -227,24 +200,9 @@ class ResNet_vd(nn.Layer):
elif output_stride == 16: elif output_stride == 16:
dilation_dict = {3: 2} dilation_dict = {3: 2}
self.conv1_1 = ConvBNLayer( self.conv1_1 = ConvBNLayer(in_channels=input_channels, out_channels=32, kernel_size=3, stride=2, act='relu')
in_channels=input_channels, self.conv1_2 = ConvBNLayer(in_channels=32, out_channels=32, kernel_size=3, stride=1, act='relu')
out_channels=32, self.conv1_3 = ConvBNLayer(in_channels=32, out_channels=64, kernel_size=3, stride=1, act='relu')
kernel_size=3,
stride=2,
act='relu')
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu')
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu')
self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
# self.block_list = [] # self.block_list = []
...@@ -264,8 +222,7 @@ class ResNet_vd(nn.Layer): ...@@ -264,8 +222,7 @@ class ResNet_vd(nn.Layer):
############################################################################### ###############################################################################
# Add dilation rate for some segmentation tasks, if dilation_dict is not None. # Add dilation rate for some segmentation tasks, if dilation_dict is not None.
dilation_rate = dilation_dict[ dilation_rate = dilation_dict[block] if dilation_dict and block in dilation_dict else 1
block] if dilation_dict and block in dilation_dict else 1
# Actually block here is 'stage', and i is 'block' in 'stage' # Actually block here is 'stage', and i is 'block' in 'stage'
# At the stage 4, expand the the dilation_rate if given multi_grid # At the stage 4, expand the the dilation_rate if given multi_grid
...@@ -275,12 +232,9 @@ class ResNet_vd(nn.Layer): ...@@ -275,12 +232,9 @@ class ResNet_vd(nn.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block], out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 stride=2 if i == 0 and block != 0 and dilation_rate == 1 else 1,
and dilation_rate == 1 else 1,
shortcut=shortcut, shortcut=shortcut,
if_first=block == i == 0, if_first=block == i == 0,
dilation=dilation_rate)) dilation=dilation_rate))
...@@ -296,9 +250,7 @@ class ResNet_vd(nn.Layer): ...@@ -296,9 +250,7 @@ class ResNet_vd(nn.Layer):
conv_name = "res" + str(block + 2) + chr(97 + i) conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer( basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BasicBlock( BasicBlock(in_channels=num_channels[block] if i == 0 else num_filters[block],
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block], out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut, shortcut=shortcut,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册