diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 09c92d3513e86a7657880c01736f5f41f53cfcf6..e88e962cff5bbfcb8be1014dbaab85568d2625ff 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2420,6 +2420,7 @@ class BatchNormLayer(LayerBase): # If not use is_static, even set learning_rate = 0, decay_rate = 0, # these paras will change if set average_window in configure. use_gpu = bool(int(g_command_config_args.get("use_gpu", 0))) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) is_shared = True if not use_gpu else False for i in xrange(2): inputs.append( @@ -2433,11 +2434,17 @@ class BatchNormLayer(LayerBase): parallel_nn = bool(int(g_command_config_args.get("parallel_nn", 0))) cudnn_version = int(g_command_config_args.get("cudnn_version", 0)) - # Automatically select cudnn_batch_norm for GPU and batch_norm for CPU. - # Also based on cudnn version. + # Automatically select cudnn_batch_norm for GPU, batch_norm for CPU + # and mkldnn_batch_norm for MKLDNN. Also based on cudnn version. + if batch_norm_type == "mkldnn_batch_norm": + config_assert(use_mkldnn, "mkldnn_batch_norm only support MKLDNN") use_cudnn = use_gpu and batch_norm_type != "batch_norm" and \ + not use_mkldnn and batch_norm_type != "mkldnn_batch_norm" and \ ((not parallel_nn) or self.config.device > -1) - self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" + if use_cudnn: + self.layer_type = "cudnn_batch_norm" + else: + self.layer_type = "mkldnn_batch_norm" if use_mkldnn else "batch_norm" super(BatchNormLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **xargs) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 09315b9d9224076d91c16a6c0b949d4ab289bf70..cc1b34df9e7cf8d17bafeb57624548de017066e9 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3014,16 +3014,19 @@ def batch_norm_layer(input, :param input: batch normalization input. Better be linear activation. Because there is an activation inside batch_normalization. :type input: LayerOutput - :param batch_norm_type: We have batch_norm and cudnn_batch_norm. batch_norm - supports both CPU and GPU. cudnn_batch_norm requires - cuDNN version greater or equal to v4 (>=v4). But - cudnn_batch_norm is faster and needs less memory - than batch_norm. By default (None), we will - automaticly select cudnn_batch_norm for GPU and - batch_norm for CPU. Otherwise, select batch norm - type based on the specified type. If you use cudnn_batch_norm, + :param batch_norm_type: We have batch_norm, mkldnn_batch_norm and cudnn_batch_norm. + batch_norm supports CPU, MKLDNN and GPU. cudnn_batch_norm + requires cuDNN version greater or equal to v4 (>=v4). + But cudnn_batch_norm is faster and needs less + memory than batch_norm. mkldnn_batch_norm requires + enable use_mkldnn. By default (None), we will + automaticly select cudnn_batch_norm for GPU, + mkldnn_batch_norm for MKLDNN and batch_norm for CPU. + Otherwise, select batch norm type based on the + specified type. If you use cudnn_batch_norm, we suggested you use latest version, such as v5.1. :type batch_norm_type: None | string, None or "batch_norm" or "cudnn_batch_norm" + or "mkldnn_batch_norm" :param act: Activation Type. Better be relu. Because batch normalization will normalize input near zero. :type act: BaseActivation @@ -3063,6 +3066,7 @@ def batch_norm_layer(input, else: num_channels = input.size assert (batch_norm_type is None) or (batch_norm_type == "batch_norm") or \ + (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") l = Layer( name=name,