Elementwise_mul 前项 shape mismatch
Created by: YingjingLu
1)PaddlePaddle版本:1.5 CUDA 9, CuDNN 7.3.1 2)CPU:i7 5820k 3)GPU:Titan XP + GTX 1080ti 4)Ubuntu 18.04, Py36
- 训练信息 1)单机/单卡
- 复现信息:如为报错,请给出复现环境、复现步骤 复现代码(直接运行即可):
import numpy as np
import paddle.fluid as fluid
import math
from paddle.fluid.layers import relu, prelu, leaky_relu
s = 64
m1 = 1.35
m2 = 0.5
m3 = 0.35
place = fluid.CUDAPlace(0)
sess = fluid.Executor( place )
class BatchNormConv2D( object ):
def __init__( self, filters = 8, \
kernel_w = 3, kernel_h= 3,
stride_h = 1, stride_v = 1,
pad_h = 0, pad_v = 0,
dilation_h = 1, dilation_v = 1,
activation = None,
use_bias = True,
kernel_init = fluid.initializer.MSRAInitializer(), bias_init = fluid.initializer.ConstantInitializer(),
groups = 1,
kernel_regu = None, bias_regu = None,
activation_regu = None,
bias_constraint = None,
reuse = False,
trainable = True,
name = "BNConv2D" ):
self.filters = filters
self.kernel_w = kernel_w
self.kernel_h = kernel_h
self.stride_h = stride_h
self.stride_v = stride_v
self.pad_h = pad_h
self.pad_v = pad_v
self.dilation_v = dilation_v
self.dilation_h = dilation_h
self.activation = activation
self.use_bias = use_bias,
self.kernel_init = kernel_init
self.bias_init = bias_init
self.groups = groups
self.kernel_regu = kernel_regu
self.bias_regu = bias_regu
self.activation_regu = activation_regu
self.bias_constraint = bias_constraint
self.reuse = reuse,
self.trainable = trainable
self.name = name
def __call__( self, inputs, transform_input = False ):
self.w = self.name + "_w"
self.b = self.name + "_b"
w_attr = fluid.ParamAttr( name = self.w,
initializer = self.kernel_init,
regularizer = self.kernel_regu,
trainable = self.trainable )
b_attr = fluid.ParamAttr( name = self.b,
initializer = self.bias_init,
regularizer = self.bias_regu,
trainable = self.trainable )
if transform_input:
inputs = fluid.layers.transpose( inputs, perm = [ 0, 3, 1, 2 ] )
print( self.name, "input shape:", inputs.shape )
"""
[ n, c, h, w ] = inputs.shape
# calc padding for each side
filter_w = self.dilation_h * ( self.kernel_w - 1 ) + 1
filter_h = self.dilation_v * ( self.kernel_h - 1 ) + 1
if self.padding.lower() == 'valid':
width = math.ceil( ( w - filter_w ) / self.stride_h ) + 1
height = math.ceil( ( h - filter_h ) / self.stride_v ) + 1
elif self.padding.lower() == "same":
width = math.ceil( w / self.stride_h )
height = math.ceil( h / self.stride_v )
else:
NotImplementedError( "Not an implemented padding for Conv2D: %s", self.padding )
print( "width", width, "height", height, [ n, c, h, w ] )
restructured_w = ( width - 1 ) * self.stride_h + filter_w
restructured_h = ( height - 1) * self.stride_v + filter_h
pad_h = max( math.ceil( ( restructured_w - w ) / 2 ), 0 )
pad_v = max( math.ceil( ( restructured_h - h ) / 2 ), 0 )
self.pad_h, self.pad_v = pad_h, pad_v
print( "pad_v", pad_v, "pad_h", pad_h )
"""
self.layer_out = fluid.layers.conv2d( input = inputs,
num_filters = self.filters,
filter_size = ( self.kernel_h, self.kernel_w ),
stride = ( self.stride_v, self.stride_h ),
padding = ( self.pad_v, self.pad_h ),
dilation = ( self.dilation_v, self.dilation_h ),
act = None,
groups = self.groups,
param_attr = w_attr,
bias_attr = b_attr,
name = self.name )
self.layer_out = fluid.layers.batch_norm( self.layer_out, act = None, use_global_stats=True )
if self.activation is not None:
self.layer_out = self.activation( self.layer_out )
print( self.name, "output shape", self.layer_out.shape )
print( "--------------------------" )
return self.layer_out
def __repr__( self ):
return self.name
def get_weights( self, sess = None ):
assert ( self.w is not None ) and ( self. b is not None ), "weights in the dense layer should be initialized"
# place = fluid.CPUPlace()
# exe = fluid.Executor(place)
# exe.run(fluid.default_startup_program())
w = fluid.global_scope().find_var( self.w ).get_tensor()
b = fluid.global_scope().find_var( self.b ).get_tensor()
return np.array( w ), np.array( b )
class Dense( object ):
def __init__( self, units,
activation = None, use_bias = True,
kernel_init = fluid.initializer.MSRAInitializer(), bias_init = fluid.initializer.ConstantInitializer(),
kernel_regu = None, bias_regu = None,
activation_regu = None,
bias_constraint = None,
reuse = False,
trainable = True,
name = "Dense" ):
self.units = units
self.activation = activation
self.use_bias = use_bias
self.kernel_init = kernel_init
self.bias_init = bias_init
self.kernel_regu = kernel_regu
self.bias_regu = bias_regu
self.activation_regu = activation_regu
self.bias_constraint = bias_constraint
self.reuse = reuse
self.trainable = trainable
self.name = name
def __call__( self, inputs ):
self.w = self.name + "_w"
self.b = self.name + "_b"
w_attr = fluid.ParamAttr( name = self.w,
initializer = self.kernel_init,
regularizer = self.kernel_regu,
trainable = self.trainable )
b_attr = fluid.ParamAttr( name = self.b,
initializer = self.bias_init,
regularizer = self.bias_regu,
trainable = self.trainable )
self.layer_out = fluid.layers.fc( inputs,
self.units,
num_flatten_dims = 1,
act = None,
param_attr = w_attr,
bias_attr = b_attr,
name = self.name )
if self.activation is not None:
self.layer_out = self.activation( self.layer_out )
return self.layer_out
def __repr__( self ):
return self.name
def get_weights( self, sess = None ):
assert ( self.w is not None ) and ( self. b is not None ), "weights in the dense layer should be initialized"
# place = fluid.CPUPlace()
# exe = fluid.Executor(place)
# exe.run(fluid.default_startup_program())
w = fluid.global_scope().find_var( self.w ).get_tensor()
b = fluid.global_scope().find_var( self.b ).get_tensor()
return np.array( w ), np.array( b )
class ArcLinear( object ):
def __init__( self, in_size, out_size, m = 4, phiflag = True ):
self.in_size = in_size
self.out_size = out_size
matrix = np.random.uniform( -1.0, 1.0, [ in_size, out_size ] )
norm = np.linalg.norm( matrix, 2, 1, True )
matrix = matrix / norm
param_attr = fluid.ParamAttr( trainable = False, name = "eye",
initializer = fluid.initializer.NumpyArrayInitializer( matrix ) )
self.weight = fluid.layers.create_parameter( [ in_size, out_size ],
"float32",
name = "weight",
attr = param_attr,
is_bias = False )
def __call__( self, inputs ):
x = inputs
ww = fluid.layers.l2_normalize( self.weight, axis = 1, name = "weight norm" )
cos_theta = fluid.layers.mul( x, ww )
return cos_theta * s
class ArcLoss( object ):
def __init__( self, gamma=0., class_num = 10575 ):
self.class_size = class_num
def __call__( self, inputs, target ):
cos_theta = inputs
cos_theta = cos_theta / s
truth_cos_theta = fluid.layers.gather( cos_theta, target )
truth_theta = fluid.layers.acos( truth_cos_theta )
truth_theta = m1 * truth_theta + m2
truth_theta = fluid.layers.cos( truth_theta )
truth_theta -= m3
diff = truth_theta - truth_cos_theta
diff = fluid.layers.reshape( diff, [ -1, 1 ], inplace = True )
diff = fluid.layers.expand( diff, [ 1, self.class_size ] )
index = fluid.layers.one_hot( target, self.class_size )
index = fluid.layers.cast( index, "float32" )
diff = fluid.layers.cast( diff, "float32" )
# index = fluid.layers.reshape( index, [ -1, self.class_size ], inplace = True )
print( "inputs shape", inputs.shape )
print( "index shape", index.shape )
print( "diff shape", diff.shape )
print( "cos_theta shape", cos_theta.shape )
index = fluid.layers.elementwise_mul( index, diff, axis = 0 )
cos_theta += index
cos_theta *= s
return cos_theta
sample_pos = fluid.layers.data( name = "sample_pos", shape = [ 125, 125, 3 ],
dtype = "float32", append_batch_size = True,
stop_gradient = False )
label_pos = fluid.layers.data( name = "label_pos", shape = [ 1 ], dtype = "int32", append_batch_size = True )
lr = fluid.layers.data( name = "lr", shape = [ 1 ], dtype = "float32", append_batch_size = False )
conv_1_1_obj = BatchNormConv2D( 64,
kernel_w = 3, kernel_h = 3,
stride_h = 2, stride_v = 2,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_1_1" )
conv_1_2_obj = BatchNormConv2D( 64,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_1_2" )
conv_1_3_obj = BatchNormConv2D( 64,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_1_3" )
conv_2_1_obj = BatchNormConv2D( 128,
kernel_w = 3, kernel_h = 3,
stride_h = 2, stride_v = 2,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_2_1" )
conv_2_2_obj = BatchNormConv2D( 128,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_2_2" )
conv_2_3_obj = BatchNormConv2D( 128,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_2_3" )
conv_2_4_obj = BatchNormConv2D( 128,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_2_4" )
conv_2_5_obj = BatchNormConv2D( 128,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_2_5" )
conv_3_1_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 2, stride_v = 2,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_1" )
conv_3_2_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_2" )
conv_3_3_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_3" )
conv_3_4_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_4" )
conv_3_5_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_5" )
conv_3_6_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_6" )
conv_3_7_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_7" )
conv_3_8_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_8" )
conv_3_9_obj = BatchNormConv2D( 256,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_3_9" )
conv_4_1_obj = BatchNormConv2D( 512,
kernel_w = 3, kernel_h = 3,
stride_h = 2, stride_v = 2,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_4_1" )
conv_4_2_obj = BatchNormConv2D( 512,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_4_2" )
conv_4_3_obj = BatchNormConv2D( 512,
kernel_w = 3, kernel_h = 3,
stride_h = 1, stride_v = 1,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_4_3" )
conv_4_4_obj = BatchNormConv2D( 512,
kernel_w = 3, kernel_h = 3,
stride_h = 2, stride_v = 2,
pad_h = 1, pad_v = 1,
activation = leaky_relu,
name = "conv_4_4" )
pre_embed_obj = Dense( 512, activation = leaky_relu, name = "pre_embed" )
embed_obj = ArcLinear( 512, 10575 )
loss_obj = ArcLoss( class_num = 10575 )
out = conv_1_1_obj( sample_pos, True )
out = out + conv_1_3_obj( conv_1_2_obj( out ) )
out = conv_2_1_obj( out )
out = out + conv_2_3_obj( conv_2_2_obj( out ) )
out = out + conv_2_5_obj( conv_2_4_obj( out ) )
out = conv_3_1_obj( out )
out = out + conv_3_3_obj( conv_3_2_obj( out ) )
out = out + conv_3_5_obj( conv_3_4_obj( out ) )
out = out + conv_3_7_obj( conv_3_6_obj( out ) )
out = out + conv_3_9_obj( conv_3_8_obj( out ) )
out = conv_4_1_obj( out )
out_tmp = conv_4_3_obj( conv_4_2_obj( out ) )
print( "HAHAHA", out.shape, out_tmp.shape )
out = conv_4_4_obj( out + out_tmp )
embed = pre_embed_obj( out )
logit = embed_obj( embed )
logit = loss_obj( logit, label_pos )
loss = fluid.layers.softmax_with_cross_entropy( logit, fluid.layers.cast( label_pos, "int64" ), soft_label = False, axis = 1)
loss = fluid.layers.mean( loss )
startup_program = fluid.default_startup_program()
main_program = fluid.default_main_program()
test_program = main_program.clone( for_test = True )
optim = fluid.optimizer.AdamOptimizer( lr, beta1 = 0.9, beta2 = 0.99 )
_, grad_list = optim.minimize( loss )
sess.run( fluid.default_startup_program() )
bs = np.random.normal( size = (8, 125, 125, 3) ).astype( np.float32 )
bl = np.random.randint( 10575, size = ( 8, 1 ) ).astype( np.int32 )
learningrate = 0.001
print( bs.shape, bl.shape )
[ loss ] = sess.run( program = main_program,
feed = { sample_pos.name : bs,
label_pos.name: bl,
lr.name : learningrate },
fetch_list = [ loss.name ] )
print( loss )
报错:
inputs shape (-1, 10575)
index shape (-1, 10575)
diff shape (-1, 10575)
cos_theta shape (-1, 10575)
W0712 14:01:24.402220 14020 device_context.cc:259] Please NOTE: device: 0, CUDA Capability: 61, Driver API Version: 9.1, Runtime API Version: 9.0
W0712 14:01:24.404572 14020 device_context.cc:267] device: 0, cuDNN Version: 7.3.
(8, 125, 125, 3) (8, 1)
Traceback (most recent call last):
File "debug.py", line 426, in <module>
fetch_list = [ loss.name ] )
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/executor.py", line 650, in run
use_program_cache=use_program_cache)
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/executor.py", line 748, in _run
exe.run(program.desc, scope, 0, True, True, fetch_var_name)
paddle.fluid.core_avx.EnforceNotMet: Invoke operator elementwise_mul error.
Python Callstacks:
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/framework.py", line 1748, in append_op
attrs=kwargs.get("attrs", None))
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layers/nn.py", line 9831, in _elementwise_op
'use_mkldnn': use_mkldnn})
File "/home/steven/.local/lib/python3.6/site-packages/paddle/fluid/layers/nn.py", line 9892, in elementwise_mul
return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
File "debug.py", line 234, in __call__
index = fluid.layers.elementwise_mul( index, diff, axis = 0 )
File "debug.py", line 404, in <module>
logit = loss_obj( logit, label_pos )
C++ Callstacks:
Enforce failed. Expected x_dims[i + axis] == y_dims[i], but received x_dims[i + axis]:8 != y_dims[i]:84600.
Broadcast dimension mismatch. at [/paddle/paddle/fluid/operators/elementwise/elementwise_op_function.h:63]
PaddlePaddle Call Stacks:
0 0x7fb3c7401818p void paddle::platform::EnforceNotMet::Init<std::string>(std::string, char const*, int) + 360
1 0x7fb3c7401b67p paddle::platform::EnforceNotMet::EnforceNotMet(std::string const&, char const*, int) + 87
2 0x7fb3c7f3995cp paddle::operators::get_mid_dims(paddle::framework::DDim const&, paddle::framework::DDim const&, int, int*, int*, int*) + 364
3 0x7fb3c8bb4fa5p void paddle::operators::ElementwiseComputeEx<paddle::operators::MulFunctor<float>, paddle::platform::CUDADeviceContext, float, float>(paddle::framework::ExecutionContext const&, paddle::framework::Tensor const*, paddle::framework::Tensor const*, int, paddle::operators::MulFunctor<float>, paddle::framework::Tensor*) + 421
4 0x7fb3c8bb5793p void paddle::operators::default_elementwise_mul<paddle::platform::CUDADeviceContext, float>(paddle::framework::ExecutionContext const&, paddle::framework::Tensor const*, paddle::framework::Tensor const*, paddle::framework::Tensor*) + 115
5 0x7fb3c8bb5afbp paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, float>::Compute(paddle::framework::ExecutionContext const&) const + 811
6 0x7fb3c8bb5f83p std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, double>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, int>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, long>, paddle::operators::ElementwiseMulKernel<paddle::platform::CUDADeviceContext, paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&) + 35
7 0x7fb3c935c6e7p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&, paddle::framework::RuntimeContext*) const + 375
8 0x7fb3c935cac1p paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) const + 529
9 0x7fb3c935a0bcp paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, boost::variant<paddle::platform::CUDAPlace, paddle::platform::CPUPlace, paddle::platform::CUDAPinnedPlace, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_, boost::detail::variant::void_> const&) + 332
10 0x7fb3c758b48ep paddle::framework::Executor::RunPreparedContext(paddle::framework::ExecutorPrepareContext*, paddle::framework::Scope*, bool, bool, bool) + 382
11 0x7fb3c758e52fp paddle::framework::Executor::Run(paddle::framework::ProgramDesc const&, paddle::framework::Scope*, int, bool, bool, std::vector<std::string, std::allocator<std::string> > const&, bool) + 143
12 0x7fb3c73f297dp
13 0x7fb3c7433cb6p
14 0x56204cp _PyCFunction_FastCallDict + 860
15 0x4f88bap
16 0x4f98c7p _PyEval_EvalFrameDefault + 1127
17 0x4f6128p
18 0x4f7d60p
19 0x4f876dp
20 0x4fa6c0p _PyEval_EvalFrameDefault + 4704
21 0x4f6128p
22 0x4f7d60p
23 0x4f876dp
24 0x4fa6c0p _PyEval_EvalFrameDefault + 4704
25 0x4f6128p
26 0x4f9023p PyEval_EvalCode + 35
27 0x6415b2p
28 0x64166ap PyRun_FileExFlags + 154
29 0x643730p PyRun_SimpleFileExFlags + 400
30 0x62b26ep Py_Main + 1438
31 0x4b4cb0p main + 224
32 0x7fb414df7b97p __libc_start_main + 231
33 0x5bdf6ap _start + 42