Elementwise_mul 前项 shape mismatch (#18610) · Issue · PaddlePaddle / Paddle

Elementwise_mul 前项 shape mismatch

Created by: YingjingLu

1）PaddlePaddle版本：1.5 CUDA 9, CuDNN 7.3.1 2）CPU：i7 5820k 3）GPU：Titan XP + GTX 1080ti 4）Ubuntu 18.04, Py36

训练信息 1）单机/单卡
复现信息：如为报错，请给出复现环境、复现步骤复现代码(直接运行即可)：

import numpy as np 
import paddle.fluid as fluid 
import math 
from paddle.fluid.layers import relu, prelu, leaky_relu
s = 64 
m1 = 1.35 
m2 = 0.5 
m3 = 0.35

place = fluid.CUDAPlace(0)
sess = fluid.Executor( place )

class BatchNormConv2D( object ):
    def __init__( self, filters = 8, \
                  kernel_w = 3, kernel_h= 3, 
                  stride_h = 1, stride_v = 1,
                  pad_h = 0, pad_v = 0,
                  dilation_h = 1, dilation_v = 1,
                  activation = None,
                  use_bias = True,
                  kernel_init = fluid.initializer.MSRAInitializer(), bias_init = fluid.initializer.ConstantInitializer(),
                  groups = 1,
                  kernel_regu = None, bias_regu = None,
                  activation_regu = None,
                  bias_constraint = None,
                  reuse = False,
                  trainable = True,
                  name = "BNConv2D" ):
        self.filters = filters
        self.kernel_w = kernel_w
        self.kernel_h = kernel_h
        self.stride_h = stride_h
        self.stride_v = stride_v
        self.pad_h = pad_h
        self.pad_v = pad_v
        self.dilation_v = dilation_v
        self.dilation_h = dilation_h
        self.activation = activation 
        self.use_bias = use_bias,
        self.kernel_init = kernel_init 
        self.bias_init = bias_init 
        self.groups = groups
        self.kernel_regu = kernel_regu
        self.bias_regu = bias_regu 
        self.activation_regu = activation_regu
        self.bias_constraint = bias_constraint
        self.reuse = reuse,
        self.trainable = trainable
        self.name = name
    
    def __call__( self, inputs, transform_input = False ):
        self.w = self.name + "_w"
        self.b = self.name + "_b"
        w_attr = fluid.ParamAttr( name = self.w, 
                                  initializer = self.kernel_init,
                                  regularizer = self.kernel_regu,
                                  trainable = self.trainable )
        b_attr = fluid.ParamAttr( name = self.b, 
                                  initializer = self.bias_init,
                                  regularizer = self.bias_regu,
                                  trainable = self.trainable )
        if transform_input:
            inputs = fluid.layers.transpose( inputs, perm = [ 0, 3, 1, 2 ] )
        print( self.name, "input shape:", inputs.shape )
        """
        [ n, c, h, w ] = inputs.shape
        # calc padding for each side
        filter_w = self.dilation_h * ( self.kernel_w - 1 ) + 1 
        filter_h = self.dilation_v * ( self.kernel_h - 1 ) + 1
        if self.padding.lower() == 'valid':
            width = math.ceil( (  w - filter_w ) / self.stride_h ) + 1
            height = math.ceil( ( h - filter_h ) / self.stride_v ) + 1
        elif self.padding.lower() == "same":
            width = math.ceil( w / self.stride_h )
            height = math.ceil( h / self.stride_v )
        else:
            NotImplementedError( "Not an implemented padding for Conv2D: %s", self.padding )
        print( "width", width, "height", height, [ n, c, h, w ] )
        restructured_w = ( width - 1 ) * self.stride_h + filter_w
        restructured_h = ( height - 1) * self.stride_v + filter_h 

        pad_h = max( math.ceil( ( restructured_w - w ) / 2 ), 0 )
        pad_v = max( math.ceil( ( restructured_h - h ) / 2 ), 0 )

        self.pad_h, self.pad_v = pad_h, pad_v
        print( "pad_v", pad_v, "pad_h", pad_h )
        """
        self.layer_out = fluid.layers.conv2d( input = inputs,
                                              num_filters = self.filters,
                                              filter_size = ( self.kernel_h, self.kernel_w ),
                                              stride = ( self.stride_v, self.stride_h ),
                                              padding = ( self.pad_v, self.pad_h ),
                                              dilation = ( self.dilation_v, self.dilation_h ),
                                              act = None, 
                                              groups = self.groups,
                                              param_attr = w_attr,
                                              bias_attr = b_attr,
                                              name = self.name )
        self.layer_out = fluid.layers.batch_norm( self.layer_out, act = None, use_global_stats=True )
        if self.activation is not None:
            self.layer_out = self.activation( self.layer_out )
        print( self.name, "output shape", self.layer_out.shape )
        print( "--------------------------" )
        return self.layer_out

    def __repr__( self ):

        return self.name  

    def get_weights( self, sess = None ):

        assert ( self.w is not None ) and ( self. b is not None ), "weights in the dense layer should be initialized"

        # place = fluid.CPUPlace()
        # exe = fluid.Executor(place)
        # exe.run(fluid.default_startup_program())
        w = fluid.global_scope().find_var( self.w ).get_tensor()
        b = fluid.global_scope().find_var( self.b ).get_tensor()

        return np.array( w ), np.array( b )

class Dense( object ):

    def __init__( self, units, 
                  activation = None, use_bias = True, 
                  kernel_init = fluid.initializer.MSRAInitializer(), bias_init = fluid.initializer.ConstantInitializer(),
                  kernel_regu = None, bias_regu = None,
                  activation_regu = None,
                  bias_constraint = None,
                  reuse = False,
                  trainable = True,
                  name = "Dense" ):

        self.units = units
        self.activation  = activation
        self.use_bias = use_bias 
        self.kernel_init = kernel_init 
        self.bias_init = bias_init 
        self.kernel_regu = kernel_regu
        self.bias_regu = bias_regu 
        self.activation_regu = activation_regu
        self.bias_constraint = bias_constraint
        self.reuse = reuse
        self.trainable = trainable
        self.name = name
    
    def __call__( self, inputs ):

        self.w = self.name + "_w"
        self.b = self.name + "_b"
        w_attr = fluid.ParamAttr( name = self.w, 
                                  initializer = self.kernel_init,
                                  regularizer = self.kernel_regu,
                                  trainable = self.trainable )
        b_attr = fluid.ParamAttr( name = self.b, 
                                  initializer = self.bias_init,
                                  regularizer = self.bias_regu,
                                  trainable = self.trainable )

        self.layer_out = fluid.layers.fc( inputs,
                                          self.units,
                                          num_flatten_dims = 1,
                                          act = None, 
                                          param_attr = w_attr,
                                          bias_attr = b_attr,
                                          name = self.name )
        if self.activation is not None:
            self.layer_out = self.activation( self.layer_out )
        return self.layer_out

    def __repr__( self ):

        return self.name  

    def get_weights( self, sess = None ):

        assert ( self.w is not None ) and ( self. b is not None ), "weights in the dense layer should be initialized"

        # place = fluid.CPUPlace()
        # exe = fluid.Executor(place)
        # exe.run(fluid.default_startup_program())
        w = fluid.global_scope().find_var( self.w ).get_tensor()
        b = fluid.global_scope().find_var( self.b ).get_tensor()

        return np.array( w ), np.array( b )

class ArcLinear( object ):

    def __init__( self, in_size, out_size, m = 4, phiflag = True ):
        self.in_size = in_size
        self.out_size = out_size
        matrix = np.random.uniform( -1.0, 1.0, [ in_size, out_size ] )
        norm = np.linalg.norm( matrix, 2, 1, True )
        matrix = matrix / norm
        param_attr = fluid.ParamAttr( trainable = False, name = "eye",
                                      initializer = fluid.initializer.NumpyArrayInitializer( matrix ) )
        self.weight = fluid.layers.create_parameter( [ in_size, out_size ], 
                                                     "float32", 
                                                     name = "weight",
                                                     attr = param_attr,
                                                     is_bias = False )
    
    def __call__( self, inputs ):
        x = inputs 
        ww = fluid.layers.l2_normalize( self.weight, axis = 1, name = "weight norm" ) 
        cos_theta = fluid.layers.mul( x, ww )
        return cos_theta * s

class ArcLoss( object ):
    def __init__( self, gamma=0., class_num = 10575 ):
        self.class_size = class_num

    def __call__( self, inputs, target ):
        cos_theta = inputs
        cos_theta = cos_theta / s 
        truth_cos_theta = fluid.layers.gather( cos_theta, target )
        truth_theta = fluid.layers.acos( truth_cos_theta )

        truth_theta = m1 * truth_theta + m2 
        truth_theta = fluid.layers.cos( truth_theta )
        truth_theta -= m3 
        diff = truth_theta - truth_cos_theta
        diff = fluid.layers.reshape( diff, [ -1, 1 ], inplace = True )
        diff = fluid.layers.expand( diff, [ 1, self.class_size ] )
        
        index = fluid.layers.one_hot( target, self.class_size )
        index = fluid.layers.cast( index, "float32"  )
        diff = fluid.layers.cast( diff, "float32" )
        # index = fluid.layers.reshape( index, [ -1, self.class_size ], inplace = True )  
        print( "inputs shape", inputs.shape )
        print( "index shape", index.shape )
        print( "diff shape", diff.shape )
        print( "cos_theta shape", cos_theta.shape )
        index = fluid.layers.elementwise_mul( index, diff, axis = 0 )
        cos_theta += index

        cos_theta *= s 

        return cos_theta

sample_pos = fluid.layers.data( name = "sample_pos", shape = [ 125, 125, 3 ], 
                                            dtype = "float32", append_batch_size = True,
                                            stop_gradient = False ) 
label_pos = fluid.layers.data( name = "label_pos", shape = [ 1 ], dtype = "int32", append_batch_size = True ) 
lr = fluid.layers.data( name = "lr", shape = [ 1 ], dtype = "float32", append_batch_size = False )
conv_1_1_obj = BatchNormConv2D( 64, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 2, stride_v = 2,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_1_1" )
conv_1_2_obj = BatchNormConv2D( 64, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_1_2" )
conv_1_3_obj = BatchNormConv2D( 64, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_1_3" )

conv_2_1_obj = BatchNormConv2D( 128, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 2, stride_v = 2,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_2_1" )
conv_2_2_obj = BatchNormConv2D( 128, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_2_2" )
conv_2_3_obj = BatchNormConv2D( 128, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_2_3" )

conv_2_4_obj = BatchNormConv2D( 128, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_2_4" )
conv_2_5_obj = BatchNormConv2D( 128, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_2_5" )

conv_3_1_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 2, stride_v = 2,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_1" )
conv_3_2_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_2" )
conv_3_3_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_3" )

conv_3_4_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_4" )
conv_3_5_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_5" )

conv_3_6_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_6" )
conv_3_7_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_7" )

conv_3_8_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_8" )
conv_3_9_obj = BatchNormConv2D( 256, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_3_9" )

conv_4_1_obj = BatchNormConv2D( 512, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 2, stride_v = 2,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_4_1" )
conv_4_2_obj = BatchNormConv2D( 512, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_4_2" )
conv_4_3_obj = BatchNormConv2D( 512, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 1, stride_v = 1,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_4_3" )

conv_4_4_obj = BatchNormConv2D( 512, 
                            kernel_w = 3, kernel_h = 3, 
                            stride_h = 2, stride_v = 2,
                            pad_h = 1, pad_v = 1,
                            activation = leaky_relu,
                            name = "conv_4_4" )

pre_embed_obj = Dense( 512, activation = leaky_relu, name = "pre_embed" )
embed_obj = ArcLinear( 512, 10575 )
loss_obj = ArcLoss( class_num = 10575 )

out = conv_1_1_obj( sample_pos, True )
out = out + conv_1_3_obj( conv_1_2_obj( out ) )

out = conv_2_1_obj( out )
out = out + conv_2_3_obj( conv_2_2_obj( out ) )
out = out + conv_2_5_obj( conv_2_4_obj( out ) )

out = conv_3_1_obj( out )
out = out + conv_3_3_obj( conv_3_2_obj( out ) )
out = out + conv_3_5_obj( conv_3_4_obj( out ) )
out = out + conv_3_7_obj( conv_3_6_obj( out ) )
out = out + conv_3_9_obj( conv_3_8_obj( out ) )

out = conv_4_1_obj( out )
out_tmp = conv_4_3_obj( conv_4_2_obj( out ) )
print( "HAHAHA", out.shape, out_tmp.shape )
out = conv_4_4_obj( out + out_tmp )
embed = pre_embed_obj( out )
logit = embed_obj( embed )
logit = loss_obj( logit, label_pos )
loss = fluid.layers.softmax_with_cross_entropy( logit, fluid.layers.cast( label_pos, "int64" ), soft_label = False, axis = 1)
loss = fluid.layers.mean( loss )


startup_program = fluid.default_startup_program()
main_program = fluid.default_main_program()
test_program = main_program.clone( for_test = True )
optim = fluid.optimizer.AdamOptimizer( lr, beta1 = 0.9, beta2 = 0.99 )
_, grad_list = optim.minimize( loss )
sess.run( fluid.default_startup_program() )


bs = np.random.normal( size = (8, 125, 125, 3) ).astype( np.float32 )
bl = np.random.randint( 10575, size = ( 8, 1 ) ).astype( np.int32 )
learningrate = 0.001

print( bs.shape, bl.shape )
[ loss ] = sess.run( program = main_program,
                     feed = { sample_pos.name : bs, 
                                label_pos.name: bl,
                                lr.name : learningrate },
                     fetch_list = [ loss.name ] )
print( loss )

报错：

inputs shape (-1, 10575)
index shape (-1, 10575)
diff shape (-1, 10575)
cos_theta shape (-1, 10575)
W0712 14:01:24.402220 14020 device_context.cc:259] Please NOTE: device: 0, CUDA Capability: 61, Driver API Version: 9.1, Runtime API Version: 9.0
W0712 14:01:24.404572 14020 device_context.cc:267] device: 0, cuDNN Version: 7.3.
(8, 125, 125, 3) (8, 1)
Traceback (most recent call last):
  File "debug.py", line 426, in <module>
    fetch_list = [ loss.name ] )
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/executor.py", line 650, in run
    use_program_cache=use_program_cache)
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/executor.py", line 748, in _run
    exe.run(program.desc, scope, 0, True, True, fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet: Invoke operator elementwise_mul error.
Python Callstacks: 
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/framework.py", line 1748, in append_op
    attrs=kwargs.get("attrs", None))
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
    return self.main_program.current_block().append_op(*args, **kwargs)
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layers/nn.py", line 9831, in _elementwise_op
    'use_mkldnn': use_mkldnn})
  File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layers/nn.py", line 9892, in elementwise_mul
    return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
  File "debug.py", line 234, in __call__
    index = fluid.layers.elementwise_mul( index, diff, axis = 0 )
  File "debug.py", line 404, in <module>
    logit = loss_obj( logit, label_pos )
C++ Callstacks: 
Enforce failed. Expected x_dims[i + axis] == y_dims[i], but received x_dims[i + axis]:8 != y_dims[i]:84600.
Broadcast dimension mismatch. at [/paddle/paddle/fluid/operators/elementwise/elementwise_op_function.h:63]
PaddlePaddle Call Stacks: 
0       0x7fb3c7401818p void paddle::platform::EnforceNotMet::Init<std::string>(std::string, char const*, int) + 360
1       0x7fb3c7401b67p paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) + 87
2       0x7fb3c7f3995cp paddle::operators::get_mid_dims(paddle::framework::DDim const&, paddle::framework::DDim const&, int, int*, int*, int*) + 364
3       0x7fb3c8bb4fa5p void paddle::operators::ElementwiseComputeEx<paddle::operators::MulFunctor<float>, paddle::platform::CUDADeviceContext, float, float>(paddle::framework::ExecutionContext const&, paddle::framework::Tensor const*, paddle::framework::Tensor const*, int, paddle::operators::MulFunctor<float>, paddle::framework::Tensor*) + 421
4       0x7fb3c8bb5793p void paddle::operators::default_elementwise_mul<paddle::platform::CUDADeviceContext, float>(paddle::framework::ExecutionContext const&, paddle::framework::Tensor const*, paddle::framework::Tensor const*, paddle::framework::Tensor*) + 115
5       0x7fb3c8bb5afbp paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, float>::Compute(paddle::framework::ExecutionContext const&) const + 811
6       0x7fb3c8bb5f83p std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, double>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, int>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, long>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&) + 35
7       0x7fb3c935c6e7p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const + 375
8       0x7fb3c935cac1p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 529
9       0x7fb3c935a0bcp paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 332
10      0x7fb3c758b48ep paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 382
11      0x7fb3c758e52fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocator<std::string> > const&, bool) + 143
12      0x7fb3c73f297dp
13      0x7fb3c7433cb6p
14            0x56204cp _PyCFunction_FastCallDict + 860
15            0x4f88bap
16            0x4f98c7p _PyEval_EvalFrameDefault + 1127
17            0x4f6128p
18            0x4f7d60p
19            0x4f876dp
20            0x4fa6c0p _PyEval_EvalFrameDefault + 4704
21            0x4f6128p
22            0x4f7d60p
23            0x4f876dp
24            0x4fa6c0p _PyEval_EvalFrameDefault + 4704
25            0x4f6128p
26            0x4f9023p PyEval_EvalCode + 35
27            0x6415b2p
28            0x64166ap PyRun_FileExFlags + 154
29            0x643730p PyRun_SimpleFileExFlags + 400
30            0x62b26ep Py_Main + 1438
31            0x4b4cb0p main + 224
32      0x7fb414df7b97p __libc_start_main + 231
33            0x5bdf6ap _start + 42

PaddlePaddle / Paddle 1 年多 前同步成功

Elementwise_mul 前项 shape mismatch

PaddlePaddle / Paddle
1 年多前同步成功