From 630cb60ae37b9690b1c15daee1b6678a09738562 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Wed, 16 Sep 2020 07:26:21 +0000 Subject: [PATCH] fix dropout --- ppcls/modeling/architectures/alexnet.py | 4 ++-- ppcls/modeling/architectures/densenet.py | 2 +- ppcls/modeling/architectures/googlenet.py | 6 +++--- ppcls/modeling/architectures/inception_v4.py | 2 +- ppcls/modeling/architectures/mobilenet_v3.py | 15 +++++++++++---- ppcls/modeling/architectures/squeezenet.py | 2 +- ppcls/modeling/architectures/vgg.py | 2 +- ppcls/modeling/architectures/xception_deeplab.py | 2 +- tools/eval.py | 14 ++++++++------ 9 files changed, 29 insertions(+), 20 deletions(-) diff --git a/ppcls/modeling/architectures/alexnet.py b/ppcls/modeling/architectures/alexnet.py index ce6a77b9..337a7113 100644 --- a/ppcls/modeling/architectures/alexnet.py +++ b/ppcls/modeling/architectures/alexnet.py @@ -83,7 +83,7 @@ class AlexNetDY(nn.Layer): 256, 256, 3, 1, 1, stdv, act="relu", name="conv5") stdv = 1.0 / math.sqrt(256 * 6 * 6) - self._drop1 = Dropout(p=0.5) + self._drop1 = Dropout(p=0.5, mode="downscale_in_infer") self._fc6 = Linear( in_features=256 * 6 * 6, out_features=4096, @@ -92,7 +92,7 @@ class AlexNetDY(nn.Layer): bias_attr=ParamAttr( name="fc6_offset", initializer=Uniform(-stdv, stdv))) - self._drop2 = Dropout(p=0.5) + self._drop2 = Dropout(p=0.5, mode="downscale_in_infer") self._fc7 = Linear( in_features=4096, out_features=4096, diff --git a/ppcls/modeling/architectures/densenet.py b/ppcls/modeling/architectures/densenet.py index f34fb83a..7179073c 100644 --- a/ppcls/modeling/architectures/densenet.py +++ b/ppcls/modeling/architectures/densenet.py @@ -89,7 +89,7 @@ class DenseLayer(nn.Layer): name=name + "_x2") if dropout: - self.dropout_func = Dropout(p=dropout) + self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer") def forward(self, input): conv = self.bn_ac_func1(input) diff --git a/ppcls/modeling/architectures/googlenet.py b/ppcls/modeling/architectures/googlenet.py index 45071ab3..2e990a7f 100644 --- a/ppcls/modeling/architectures/googlenet.py +++ b/ppcls/modeling/architectures/googlenet.py @@ -126,7 +126,7 @@ class GoogleNetDY(nn.Layer): self._pool_5 = AvgPool2d(kernel_size=7, stride=7) - self._drop = Dropout(p=0.4) + self._drop = Dropout(p=0.4, mode="downscale_in_infer") self._fc_out = Linear( 1024, class_dim, @@ -139,7 +139,7 @@ class GoogleNetDY(nn.Layer): 1024, weight_attr=xavier(2048, 1, "fc_o1"), bias_attr=ParamAttr(name="fc_o1_offset")) - self._drop_o1 = Dropout(p=0.7) + self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer") self._out1 = Linear( 1024, class_dim, @@ -152,7 +152,7 @@ class GoogleNetDY(nn.Layer): 1024, weight_attr=xavier(2048, 1, "fc_o2"), bias_attr=ParamAttr(name="fc_o2_offset")) - self._drop_o2 = Dropout(p=0.7) + self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer") self._out2 = Linear( 1024, class_dim, diff --git a/ppcls/modeling/architectures/inception_v4.py b/ppcls/modeling/architectures/inception_v4.py index 729256d1..5d23d365 100644 --- a/ppcls/modeling/architectures/inception_v4.py +++ b/ppcls/modeling/architectures/inception_v4.py @@ -412,7 +412,7 @@ class InceptionV4DY(nn.Layer): self._inceptionC_3 = InceptionC(name="3") self.avg_pool = AdaptiveAvgPool2d(1) - self._drop = Dropout(p=0.2) + self._drop = Dropout(p=0.2, mode="downscale_in_infer") stdv = 1.0 / math.sqrt(1536 * 1.0) self.out = Linear( 1536, diff --git a/ppcls/modeling/architectures/mobilenet_v3.py b/ppcls/modeling/architectures/mobilenet_v3.py index 95a023c9..60f29bf4 100644 --- a/ppcls/modeling/architectures/mobilenet_v3.py +++ b/ppcls/modeling/architectures/mobilenet_v3.py @@ -46,7 +46,11 @@ def make_divisible(v, divisor=8, min_value=None): class MobileNetV3(nn.Layer): - def __init__(self, scale=1.0, model_name="small", class_dim=1000): + def __init__(self, + scale=1.0, + model_name="small", + dropout_prob=0.2, + class_dim=1000): super(MobileNetV3, self).__init__() inplanes = 16 @@ -144,24 +148,27 @@ class MobileNetV3(nn.Layer): weight_attr=ParamAttr(name="last_1x1_conv_weights"), bias_attr=False) + self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer") + self.out = Linear( self.cls_ch_expand, class_dim, weight_attr=ParamAttr("fc_weights"), bias_attr=ParamAttr(name="fc_offset")) - def forward(self, inputs, label=None, dropout_prob=0.2): + def forward(self, inputs, label=None): x = self.conv1(inputs) for block in self.block_list: x = block(x) + x = self.last_second_conv(x) x = self.pool(x) + x = self.last_conv(x) x = F.hard_swish(x) - x = F.dropout(x=x, p=dropout_prob) + x = self.dropout(x) x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]]) x = self.out(x) - return x diff --git a/ppcls/modeling/architectures/squeezenet.py b/ppcls/modeling/architectures/squeezenet.py index 406dbd3b..1e38d841 100644 --- a/ppcls/modeling/architectures/squeezenet.py +++ b/ppcls/modeling/architectures/squeezenet.py @@ -101,7 +101,7 @@ class SqueezeNet(nn.Layer): self._conv7 = MakeFire(384, 64, 256, 256, name="fire8") self._conv8 = MakeFire(512, 64, 256, 256, name="fire9") - self._drop = Dropout(p=0.5) + self._drop = Dropout(p=0.5, mode="downscale_in_infer") self._conv9 = Conv2d( 512, class_dim, diff --git a/ppcls/modeling/architectures/vgg.py b/ppcls/modeling/architectures/vgg.py index 1c5c02a1..dec7c3d7 100644 --- a/ppcls/modeling/architectures/vgg.py +++ b/ppcls/modeling/architectures/vgg.py @@ -89,7 +89,7 @@ class VGGNet(nn.Layer): self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_") self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_") - self._drop = Dropout(p=0.5) + self._drop = Dropout(p=0.5, mode="downscale_in_infer") self._fc1 = Linear( 7 * 7 * 512, 4096, diff --git a/ppcls/modeling/architectures/xception_deeplab.py b/ppcls/modeling/architectures/xception_deeplab.py index 24415975..2e77ed54 100644 --- a/ppcls/modeling/architectures/xception_deeplab.py +++ b/ppcls/modeling/architectures/xception_deeplab.py @@ -346,7 +346,7 @@ class XceptionDeeplab(nn.Layer): self.stride = s - self._drop = Dropout(p=0.5) + self._drop = Dropout(p=0.5, mode="downscale_in_infer") self._pool = AdaptiveAvgPool2d(1) self._fc = Linear( self.chns[1][-1], diff --git a/tools/eval.py b/tools/eval.py index d6bd82c0..06349ca9 100644 --- a/tools/eval.py +++ b/tools/eval.py @@ -12,13 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from paddle.distributed import ParallelEnv import paddle -from ppcls.utils import logger -from ppcls.utils.save_load import init_model -from ppcls.utils.config import get_config -from ppcls.data import Reader -import program +from paddle.distributed import ParallelEnv + import argparse import os import sys @@ -26,6 +22,12 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) sys.path.append(__dir__) sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) +from ppcls.utils import logger +from ppcls.utils.save_load import init_model +from ppcls.utils.config import get_config +from ppcls.data import Reader +import program + def parse_args(): parser = argparse.ArgumentParser("PaddleClas eval script") -- GitLab