fix dropout

630cb60a · littletomatodonkey · 5ce653d6 · 630cb60a · 630cb60a · 630cb60a
9 changed file
--- a/ppcls/modeling/architectures/alexnet.py
+++ b/ppcls/modeling/architectures/alexnet.py
@@ -83,7 +83,7 @@ class AlexNetDY(nn.Layer):
            256, 256, 3, 1, 1, stdv, act="relu", name="conv5")
        stdv = 1.0 / math.sqrt(256 * 6 * 6)
-        self._drop1 = Dropout(p=0.5)
+        self._drop1 = Dropout(p=0.5, mode="downscale_in_infer")
        self._fc6 = Linear(
            in_features=256 * 6 * 6,
            out_features=4096,
@@ -92,7 +92,7 @@ class AlexNetDY(nn.Layer):
            bias_attr=ParamAttr(
                name="fc6_offset", initializer=Uniform(-stdv, stdv)))
-        self._drop2 = Dropout(p=0.5)
+        self._drop2 = Dropout(p=0.5, mode="downscale_in_infer")
        self._fc7 = Linear(
            in_features=4096,
            out_features=4096,

--- a/ppcls/modeling/architectures/densenet.py
+++ b/ppcls/modeling/architectures/densenet.py
@@ -89,7 +89,7 @@ class DenseLayer(nn.Layer):
            name=name + "_x2")
        if dropout:
-            self.dropout_func = Dropout(p=dropout)
+            self.dropout_func = Dropout(p=dropout, mode="downscale_in_infer")
    def forward(self, input):
        conv = self.bn_ac_func1(input)

--- a/ppcls/modeling/architectures/googlenet.py
+++ b/ppcls/modeling/architectures/googlenet.py
@@ -126,7 +126,7 @@ class GoogleNetDY(nn.Layer):
        self._pool_5 = AvgPool2d(kernel_size=7, stride=7)
-        self._drop = Dropout(p=0.4)
+        self._drop = Dropout(p=0.4, mode="downscale_in_infer")
        self._fc_out = Linear(
            1024,
            class_dim,
@@ -139,7 +139,7 @@ class GoogleNetDY(nn.Layer):
            1024,
            weight_attr=xavier(2048, 1, "fc_o1"),
            bias_attr=ParamAttr(name="fc_o1_offset"))
-        self._drop_o1 = Dropout(p=0.7)
+        self._drop_o1 = Dropout(p=0.7, mode="downscale_in_infer")
        self._out1 = Linear(
            1024,
            class_dim,
@@ -152,7 +152,7 @@ class GoogleNetDY(nn.Layer):
            1024,
            weight_attr=xavier(2048, 1, "fc_o2"),
            bias_attr=ParamAttr(name="fc_o2_offset"))
-        self._drop_o2 = Dropout(p=0.7)
+        self._drop_o2 = Dropout(p=0.7, mode="downscale_in_infer")
        self._out2 = Linear(
            1024,
            class_dim,

--- a/ppcls/modeling/architectures/inception_v4.py
+++ b/ppcls/modeling/architectures/inception_v4.py
@@ -412,7 +412,7 @@ class InceptionV4DY(nn.Layer):
        self._inceptionC_3 = InceptionC(name="3")
        self.avg_pool = AdaptiveAvgPool2d(1)
-        self._drop = Dropout(p=0.2)
+        self._drop = Dropout(p=0.2, mode="downscale_in_infer")
        stdv = 1.0 / math.sqrt(1536 * 1.0)
        self.out = Linear(
            1536,

--- a/ppcls/modeling/architectures/mobilenet_v3.py
+++ b/ppcls/modeling/architectures/mobilenet_v3.py
@@ -46,7 +46,11 @@ def make_divisible(v, divisor=8, min_value=None):
 class MobileNetV3(nn.Layer):
-    def __init__(self, scale=1.0, model_name="small", class_dim=1000):
+    def __init__(self,
+                 scale=1.0,
+                 model_name="small",
+                 dropout_prob=0.2,
+                 class_dim=1000):
        super(MobileNetV3, self).__init__()
        inplanes = 16
@@ -144,24 +148,27 @@ class MobileNetV3(nn.Layer):
            weight_attr=ParamAttr(name="last_1x1_conv_weights"),
            bias_attr=False)
+        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
        self.out = Linear(
            self.cls_ch_expand,
            class_dim,
            weight_attr=ParamAttr("fc_weights"),
            bias_attr=ParamAttr(name="fc_offset"))
-    def forward(self, inputs, label=None, dropout_prob=0.2):
+    def forward(self, inputs, label=None):
        x = self.conv1(inputs)
        for block in self.block_list:
            x = block(x)
        x = self.last_second_conv(x)
        x = self.pool(x)
        x = self.last_conv(x)
        x = F.hard_swish(x)
-        x = F.dropout(x=x, p=dropout_prob)
+        x = self.dropout(x)
        x = paddle.reshape(x, shape=[x.shape[0], x.shape[1]])
        x = self.out(x)
        return x

--- a/ppcls/modeling/architectures/squeezenet.py
+++ b/ppcls/modeling/architectures/squeezenet.py
@@ -101,7 +101,7 @@ class SqueezeNet(nn.Layer):
            self._conv7 = MakeFire(384, 64, 256, 256, name="fire8")
            self._conv8 = MakeFire(512, 64, 256, 256, name="fire9")
-        self._drop = Dropout(p=0.5)
+        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
        self._conv9 = Conv2d(
            512,
            class_dim,

--- a/ppcls/modeling/architectures/vgg.py
+++ b/ppcls/modeling/architectures/vgg.py
@@ -89,7 +89,7 @@ class VGGNet(nn.Layer):
        self._conv_block_4 = ConvBlock(256, 512, self.groups[3], name="conv4_")
        self._conv_block_5 = ConvBlock(512, 512, self.groups[4], name="conv5_")
-        self._drop = Dropout(p=0.5)
+        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
        self._fc1 = Linear(
            7 * 7 * 512,
            4096,

--- a/ppcls/modeling/architectures/xception_deeplab.py
+++ b/ppcls/modeling/architectures/xception_deeplab.py
@@ -346,7 +346,7 @@ class XceptionDeeplab(nn.Layer):
        self.stride = s
-        self._drop = Dropout(p=0.5)
+        self._drop = Dropout(p=0.5, mode="downscale_in_infer")
        self._pool = AdaptiveAvgPool2d(1)
        self._fc = Linear(
            self.chns[1][-1],

--- a/tools/eval.py
+++ b/tools/eval.py
@@ -12,13 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle.distributed import ParallelEnv
 import paddle
-from ppcls.utils import logger
+from paddle.distributed import ParallelEnv
-from ppcls.utils.save_load import init_model
-from ppcls.utils.config import get_config
-from ppcls.data import Reader
-import program
 import argparse
 import os
 import sys
@@ -26,6 +22,12 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+from ppcls.utils import logger
+from ppcls.utils.save_load import init_model
+from ppcls.utils.config import get_config
+from ppcls.data import Reader
+import program
 def parse_args():
    parser = argparse.ArgumentParser("PaddleClas eval script")