diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp index 24f29a06a904889ae74df3438e29e14f677d2aa9..40edf7aa1ba615603f64700559f0e6dc75773968 100644 --- a/modules/dnn/src/dnn.cpp +++ b/modules/dnn/src/dnn.cpp @@ -1233,12 +1233,13 @@ struct Net::Impl } } - // For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU + // For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU/Power if ( preferableTarget != DNN_TARGET_OPENCL || (preferableTarget == DNN_TARGET_OPENCL && nextData && (!nextData->type.compare("ReLU") || - !nextData->type.compare("ChannelsPReLU"))) ) + !nextData->type.compare("ChannelsPReLU") || + !nextData->type.compare("Power"))) ) { Ptr nextActivLayer; @@ -1253,6 +1254,78 @@ struct Net::Impl printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); activData->skipFlags[DNN_BACKEND_DEFAULT] = true; ld.outputBlobs = layers[lpNext.lid].outputBlobs; + + if ( preferableTarget == DNN_TARGET_OPENCL ) + { + nextData = &layers[activData->consumers[0].lid]; + lpNext = LayerPin(activData->consumers[0].lid, 0); + } + } + } + + // fuse convlution layer followed by eltwise + relu + if ( preferableTarget == DNN_TARGET_OPENCL ) + { + Ptr nextEltwiseLayer; + if( nextData ) + nextEltwiseLayer = nextData->layerInstance.dynamicCast(); + + if( !nextEltwiseLayer.empty() && pinsToKeep.count(lpNext) == 0 ) + { + LayerData *eltwiseData = nextData; + // go down from the second input and find the first non-skipped layer. + LayerData *downLayerData = &layers[eltwiseData->inputBlobsId[1].lid]; + while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT]) + { + downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; + } + + // second input layer is current layer. + if ( ld.id == downLayerData->id ) + { + // go down from the first input and find the first non-skipped layer + downLayerData = &layers[eltwiseData->inputBlobsId[0].lid]; + while (downLayerData->skipFlags[DNN_BACKEND_DEFAULT]) + { + if ( !downLayerData->type.compare("Eltwise") ) + downLayerData = &layers[downLayerData->inputBlobsId[1].lid]; + else + downLayerData = &layers[downLayerData->inputBlobsId[0].lid]; + } + + Ptr convLayer; + if( downLayerData ) + convLayer = downLayerData->layerInstance.dynamicCast(); + + // first input layer is convolution layer + if( !convLayer.empty() ) + { + // fuse eltwise + activation layer + LayerData *firstConvLayerData = downLayerData; + { + nextData = &layers[eltwiseData->consumers[0].lid]; + lpNext = LayerPin(eltwiseData->consumers[0].lid, 0); + Ptr nextActivLayer; + if( nextData ) + nextActivLayer = nextData->layerInstance.dynamicCast(); + + if( !nextActivLayer.empty() && pinsToKeep.count(lpNext) == 0 && + (!nextData->type.compare("ReLU") || + !nextData->type.compare("ChannelsPReLU") || + !nextData->type.compare("Power")) && + currLayer->setActivation(nextActivLayer) ) + { + CV_Assert(firstConvLayerData->outputBlobs.size() == 1 && ld.inputBlobs.size() == 1); + ld.inputBlobs.push_back(&firstConvLayerData->outputBlobs[0]); + printf_(("\tfused with %s\n", nextEltwiseLayer->name.c_str())); + printf_(("\tfused with %s\n", nextActivLayer->name.c_str())); + eltwiseData->skipFlags[DNN_BACKEND_DEFAULT] = true; + nextData->skipFlags[DNN_BACKEND_DEFAULT] = true; + ld.outputBlobs = layers[lpNext.lid].outputBlobs; + } + } + } + } } } } diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index b2f4ad9ad0577de7bf57a38517dd7590be19eea4..579b8f5a5cfa2ee248e3265d8af338a05bdce0c4 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -142,6 +142,9 @@ public: } }; + +#define IS_POWER_LAYER(layer) \ + (!layer.empty() && !layer->type.compare("Power")) //TODO: simultaneously convolution and bias addition for cache optimization class ConvolutionLayerImpl : public BaseConvolutionLayerImpl { @@ -161,6 +164,7 @@ public: bool newWeightAndBias; bool newActiv; ocl4dnnFusedActiv_t activType; + float power; #endif ConvolutionLayerImpl() { @@ -169,6 +173,7 @@ public: newWeightAndBias = false; newActiv = false; activType = OCL4DNN_CONV_FUSED_ACTIV_NONE; + power = 0.f; #endif } @@ -225,6 +230,22 @@ public: #ifdef HAVE_OPENCL newActiv = true; activType = OCL4DNN_CONV_FUSED_ACTIV_NONE; + + if (preferableTarget == DNN_TARGET_OPENCL) + { + Ptr activ_power = activ.dynamicCast(); + if (!activ_power.empty()) + { + if (activ_power->scale != 1.f || activ_power->shift != 0.f) + newWeightAndBias = true; + + if (activ_power->scale != 1.f) + weightsMat.release(); + + power = activ_power->power; + activType = OCL4DNN_CONV_FUSED_ACTIV_POWER; + } + } #endif return !activ.empty(); } @@ -727,11 +748,12 @@ public: biasvec[k] = biasMat.at(k); } - if( !bnorm.empty() || !scaleLayer.empty() ) + if( !bnorm.empty() || !scaleLayer.empty() || IS_POWER_LAYER(activ)) { Mat scale, shift, scale2, shift2; const float *scaleptr = 0, *shiftptr = 0; const float *scaleptr2 = 0, *shiftptr2 = 0; + float a = 1.f, b = 0.f; if( !bnorm.empty() ) { @@ -758,7 +780,14 @@ public: } } - if (shiftptr || shiftptr2) + if( IS_POWER_LAYER(activ) ) + { + Ptr activ_power = activ.dynamicCast(); + a = activ_power->scale; + b = activ_power->shift; + } + + if (shiftptr || shiftptr2 || b != 0.f) fusedBias = true; for( int i = 0; i < outCn; i++ ) @@ -771,9 +800,9 @@ public: int j, wcols = weightsMat.cols; for( j = 0; j < wcols; j++ ) - w_i[j] *= (s1*s2); + w_i[j] *= (s1*s2*a); - biasvec[i] = biasvec[i]*(s1*s2) + (delta1*s2 + delta2); + biasvec[i] = biasvec[i]*(s1*s2*a) + (delta1*s2*a + delta2*a + b); } } biasvec[outCn] = biasvec[outCn+1] = biasvec[outCn-1]; @@ -827,10 +856,15 @@ public: CV_Assert(!reluslope.empty()); convolutionOp->setActivPReLU(true, reluslope); } + else if ( activType == OCL4DNN_CONV_FUSED_ACTIV_POWER) + { + convolutionOp->setActivPower(true, power); + } else { convolutionOp->setActivReLU(false, 0); convolutionOp->setActivPReLU(false, reluslope); + convolutionOp->setActivPower(false, 1.f); } newActiv = false; } @@ -840,6 +874,7 @@ public: int batch_size = inpMat.size[0]; return convolutionOp->Forward(inpMat, + inputs.size() == 2 ? inputs[1] : UMat(), umat_blobs[0], (hasBias() || fusedBias) ? umat_blobs[1] : UMat(), outMat, diff --git a/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp b/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp index b3c5d9c560b0178583f823635d9de2c16dd62769..de636350bd6aad97852ec83c007b90eb35c1e4c8 100644 --- a/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp +++ b/modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp @@ -140,154 +140,154 @@ NULL platform behavior clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform ********************************************************************************/ -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","6 4 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ", -"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","3 3 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 4 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","2 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ", -"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 8 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","8 3 8 2 1 1 8 1 0 ", -"EU72_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","4 3 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 4 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1_eltwise0","6 4 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2_eltwise0","3 3 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU72_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ3_eltwise0","1 8 32 5 1 8 1 1 0 ", "EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ", -"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 4 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","1 16 32 5 1 16 1 1 0 ", -"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 3 8 2 1 1 8 1 0 ", -"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 4 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","2 8 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","1 16 32 5 1 16 1 1 0 ", -"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","8 3 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","4 7 8 2 1 1 8 1 0 ", -"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","8 2 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","4 6 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 5 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","6 4 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 8 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","12 2 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","4 7 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","8 3 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ", +"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","2 4 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0_eltwise0","2 3 8 2 1 1 8 1 0 ", +"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1_eltwise0","4 4 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1_eltwise0","2 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2_eltwise0","8 3 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","4 7 8 2 1 1 8 1 0 ", +"EU72_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1_eltwise0","8 2 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0_eltwise0","4 6 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1_eltwise0","2 5 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1_eltwise0","6 4 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2_eltwise0","4 7 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", "EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ", "EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ", -"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 4 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","12 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","12 2 8 2 1 1 8 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 3 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","8 3 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","6 2 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ", -"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","8 3 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","1 16 32 5 1 16 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","2 6 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","6 4 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","2 7 16 2 1 1 16 1 0 ", -"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 5 16 2 1 1 16 1 0 ", +"EU72_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0_eltwise0","4 1 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0_eltwise0","1 4 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2_eltwise0","12 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","8 1 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0_eltwise0","8 3 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1_eltwise0","6 2 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2_eltwise0","2 6 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","6 4 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 5 16 2 1 1 16 1 0 ", "EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","2 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","12 1 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","8 1 16 2 1 1 16 1 0 ", -"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ", -"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ", -"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 3 16 2 1 1 16 1 0 ", -"EU72_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 5 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ", -"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","2 8 32 5 1 8 1 1 0 ", -"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 8 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 4 16 2 1 1 16 1 0 ", -"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","1 16 32 5 1 16 1 1 0 ", -"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","8 1 16 2 1 1 16 1 0 ", +"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU72_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU72_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU72_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1_eltwise0","2 7 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1_eltwise0","2 5 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU72_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ3_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU72_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU72_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU72_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU72_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", // Below is the information for OpenCL based on which these configurations tuned /******************************************************************************* Number of platforms 1 @@ -427,154 +427,154 @@ NULL platform behavior clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform ********************************************************************************/ -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","8 1 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", "EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 4 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 6 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 3 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","1 16 32 5 1 16 1 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ", -"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 3 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 2 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","2 8 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1_eltwise0","2 6 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0_eltwise0","2 2 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", "EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 2 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","8 1 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","12 1 8 2 1 1 8 1 0 ", -"EU48_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 2 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","8 1 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU48_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", "EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","12 1 8 2 1 1 8 1 0 ", -"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 4 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","6 4 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ", -"EU48_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","8 2 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 1 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 16 32 5 1 16 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 2 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 2 8 2 1 1 8 1 0 ", -"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 2 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","4 4 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","4 2 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU48_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","2 4 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1_eltwise0","6 4 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU48_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2_eltwise0","8 2 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU48_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ3_eltwise0","1 16 32 5 1 16 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1_eltwise0","14 2 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0_eltwise0","1 2 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0_eltwise0","4 1 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","4 2 8 2 1 1 8 1 0 ", +"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","4 4 8 2 1 1 8 1 0 ", "EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","6 4 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","8 3 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 4 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 8 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","8 1 16 2 1 1 16 1 0 ", -"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","4 6 8 2 1 1 8 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","10 2 8 2 1 1 8 1 0 ", -"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","12 1 8 2 1 1 8 1 0 ", -"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","2 8 16 2 1 1 16 1 0 ", -"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","2 8 16 2 1 1 16 1 0 ", -"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 3 16 2 1 1 16 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","2 8 32 5 1 8 1 1 0 ", -"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2_eltwise0","8 3 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","6 4 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ3_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU48_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","8 1 16 2 1 1 16 1 0 ", +"EU48_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2_eltwise0","10 2 8 2 1 1 8 1 0 ", +"EU48_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1_eltwise0","12 1 8 2 1 1 8 1 0 ", +"EU48_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU48_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU48_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU48_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU48_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", // Below is the information for OpenCL based on which these configurations tuned /******************************************************************************* Number of platforms 1 @@ -714,153 +714,153 @@ NULL platform behavior clCreateContextFromType(NULL, CL_DEVICE_TYPE_CUSTOM) No platform clCreateContextFromType(NULL, CL_DEVICE_TYPE_ALL) No platform ********************************************************************************/ -"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1","8 3 8 2 1 1 8 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2","8 3 16 2 1 1 16 1 0 ", -"EU24_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2","2 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0","2 4 8 2 1 1 8 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2","10 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2","2 8 16 2 1 1 16 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M384_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M256_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M24_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M112_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M144_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x5_cn32_g1_s1x1_d1x1_b1_in64x64_p0x2_num1_M32_activ2_eltwise0","8 3 16 2 1 1 16 1 0 ", +"EU24_k2x2_cn16_g1_s2x2_d1x1_b1_in256x256_p0x0_num1_M16_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M16_activ2_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M5_activ0_eltwise0","2 4 8 2 1 1 8 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M96_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M192_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn32_g1_s1x1_d2x2_b1_in64x64_p2x2_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M224_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn32_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M32_activ2_eltwise0","10 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M16_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn384_g2_s1x1_d1x1_b1_in16x16_p1x1_num1_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M96_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn4_g1_s1x1_d1x1_b1_in256x256_p1x1_num1_M4_activ2_eltwise0","2 8 16 2 1 1 16 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k2x2_cn64_g1_s2x2_d1x1_b1_in128x128_p0x0_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M208_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", "EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M1024_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","8 2 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0","1 1 1 4 1 1 1 0 1 ", -"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2","2 4 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 6 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2","8 3 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1","4 2 16 2 1 1 16 1 0 ", -"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0","4 4 8 2 1 1 8 1 0 ", -"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0","4 1 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1","4 3 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1","12 2 8 2 1 1 8 1 0 ", -"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 7 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1","4 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","12 2 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1","14 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1","8 3 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1","2 8 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn2048_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn32_g1_s1x1_d16x16_b1_in64x64_p16x16_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M384_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn4_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M16_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","8 2 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M16_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num1_M32_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k5x4_cn6_g3_s3x2_d1x1_b1_in128x80_p1x0_num2_M4_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU24_k5x5_cn24_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn16_g1_s1x1_d1x1_b1_in128x128_p1x1_num1_M16_activ2_eltwise0","2 4 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","2 6 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M4_activ2_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn192_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M384_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M64_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M4_activ0_eltwise0","4 4 8 2 1 1 8 1 0 ", +"EU24_k11x7_cn3_g1_s3x4_d1x1_b1_in64x64_p3x2_num1_M64_activ0_eltwise0","4 1 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M16_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M320_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k5x5_cn96_g2_s1x1_d1x1_b1_in32x32_p2x2_num1_M128_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M16_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k7x7_cn3_g1_s2x2_d1x1_b1_in224x224_p3x3_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M384_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","4 7 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M512_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M48_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ3_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num2_M192_activ1_eltwise0","14 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M192_activ1_eltwise0","14 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M16_activ1_eltwise0","8 3 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn64_g1_s1x1_d1x1_b1_in64x64_p1x1_num1_M64_activ1_eltwise0","2 8 16 2 1 1 16 1 0 ", "EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M512_activ1_eltwise1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1","4 6 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1","14 2 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1","12 2 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1","4 6 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2","12 2 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1","4 3 16 2 1 1 16 1 0 ", -"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0","1 3 8 2 1 1 8 1 0 ", -"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0","1 1 1 4 1 1 1 0 1 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1","2 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1","12 2 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1","4 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M48_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num2_M192_activ1_eltwise0","14 2 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn32_g1_s1x1_d8x8_b1_in64x64_p8x8_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M24_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M32_activ1_eltwise0","4 6 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn512_g1_s2x2_d1x1_b1_in32x32_p0x0_num1_M1024_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in256x256_p0x0_num1_M4_activ2_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M32_activ2_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn32_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M128_activ3_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn480_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M192_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn16_g1_s1x1_d1x1_b1_in128x128_p0x0_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn1024_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn512_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M512_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M144_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn96_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M208_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k11x11_cn3_g1_s4x4_d1x1_b1_in224x224_p0x0_num1_M96_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn32_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M64_activ1_eltwise0","4 3 16 2 1 1 16 1 0 ", +"EU24_k5x1_cn32_g1_s1x1_d1x1_b0_in64x64_p2x0_num1_M32_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k4x4_cn3_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M2_activ0_eltwise0","1 3 8 2 1 1 8 1 0 ", +"EU24_k3x3_cn112_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M224_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn1024_g1_s2x2_d1x1_b1_in16x16_p0x0_num1_M2048_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn3_g1_s2x2_d1x1_b1_in256x256_p1x1_num1_M13_activ0_eltwise0","1 1 1 4 1 1 1 0 1 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num2_M64_activ1_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num1_M32_activ1_eltwise0","12 2 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn128_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M64_activ0_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M112_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num2_M128_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn528_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M128_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num2_M256_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", "EU24_k1x1_cn64_g1_s1x1_d1x1_b1_in64x64_p0x0_num1_M256_activ1_eltwise1","1 16 32 5 1 16 1 1 0 ", -"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1","4 2 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1","4 3 8 2 1 1 8 1 0 ", -"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0","2 8 32 5 1 8 1 1 0 ", -"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1","4 4 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1","2 7 16 2 1 1 16 1 0 ", -"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1","1 8 32 5 1 8 1 1 0 ", -"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2","1 8 32 5 1 8 1 1 0 ", +"EU24_k1x1_cn832_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M160_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn144_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M288_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn256_g1_s1x1_d1x1_b1_in16x16_p1x1_num1_M256_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M128_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in32x32_p2x2_num2_M32_activ1_eltwise0","4 2 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn256_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M64_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn48_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M128_activ1_eltwise0","4 3 8 2 1 1 8 1 0 ", +"EU24_k1x1_cn256_g1_s2x2_d1x1_b1_in64x64_p0x0_num1_M512_activ0_eltwise0","2 8 32 5 1 8 1 1 0 ", +"EU24_k5x5_cn16_g1_s1x1_d1x1_b1_in16x16_p2x2_num1_M48_activ1_eltwise0","4 4 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn160_g1_s1x1_d1x1_b1_in16x16_p1x1_num2_M320_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", +"EU24_k1x1_cn192_g1_s1x1_d1x1_b1_in32x32_p0x0_num2_M32_activ1_eltwise0","1 8 32 5 1 8 1 1 0 ", +"EU24_k3x3_cn32_g1_s1x1_d4x4_b1_in64x64_p4x4_num1_M32_activ2_eltwise0","1 8 32 5 1 8 1 1 0 ", "EU24_k1x1_cn512_g1_s1x1_d1x1_b1_in16x16_p0x0_num1_M2048_activ1_eltwise1","4 7 16 2 1 1 16 1 0 ", -"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1","2 7 16 2 1 1 16 1 0 ", +"EU24_k3x3_cn128_g1_s1x1_d1x1_b1_in32x32_p1x1_num1_M192_activ1_eltwise0","2 7 16 2 1 1 16 1 0 ", }; #endif diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index d1440c53baa5742284439b8cc87907b279aec6fd..4a62546b822e9df5bbf4c4d2d3892fd281a47dfb 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -77,6 +77,7 @@ typedef enum { OCL4DNN_CONV_FUSED_ACTIV_NONE = 0, OCL4DNN_CONV_FUSED_ACTIV_RELU = 1, OCL4DNN_CONV_FUSED_ACTIV_PRELU = 2, + OCL4DNN_CONV_FUSED_ACTIV_POWER = 3 } ocl4dnnFusedActiv_t; template @@ -86,11 +87,13 @@ class OCL4DNNConvSpatial explicit OCL4DNNConvSpatial(OCL4DNNConvConfig config); ~OCL4DNNConvSpatial(); bool Forward(const UMat& bottom_data, + const UMat& bottom_data2, const UMat& weight, const UMat& bias, UMat& top_data, int32_t batch_size); void setActivReLU(bool fuse_activ, float slope); void setActivPReLU(bool fuse_activ, std::vector &slope); + void setActivPower(bool fuse_activ, float power); void setBias(bool bias_term); private: @@ -252,8 +255,8 @@ class OCL4DNNConvSpatial int lx, int ly, int lz, bool swizzle, bool nullLocal); void generateTunerItems(std::vector< cv::Ptr > &tunerItems); - void setFusionDefine(ocl4dnnFusedActiv_t fused_activ); - void setFusionArg(ocl4dnnFusedActiv_t fused_activ, ocl::Kernel &kernel, cl_uint &argIdx); + void setFusionDefine(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise); + void setFusionArg(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise, ocl::Kernel &kernel, cl_uint &argIdx); int32_t group_; bool bias_term_; @@ -305,6 +308,8 @@ class OCL4DNNConvSpatial float negative_slope_; UMat negative_slope_umat_; ocl4dnnFusedActiv_t fused_activ_; + float power_; + bool fused_eltwise_; }; typedef enum { diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 2ce58e32d279b64f38262b9cff1913a9d750be6a..e072ac3a0cb2006615bb7416515311b9725d190f 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -79,6 +79,8 @@ OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) group_ = config.group; fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; + fused_eltwise_ = false; + power_ = 1.f; negative_slope_ = 0; prev_kernel_type_ = -1; tuned_ = false; @@ -141,8 +143,11 @@ OCL4DNNConvSpatial::~OCL4DNNConvSpatial() } template -void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ) +void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise) { + if (fused_eltwise) + addDef("FUSED_CONV_ELTWISE", 1); + switch (fused_activ) { case OCL4DNN_CONV_FUSED_ACTIV_RELU: addDef("FUSED_CONV_RELU", 1); @@ -150,6 +155,9 @@ void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ) case OCL4DNN_CONV_FUSED_ACTIV_PRELU: addDef("FUSED_CONV_PRELU", 1); break; + case OCL4DNN_CONV_FUSED_ACTIV_POWER: + addDef("FUSED_CONV_POWER", 1); + break; default: ; } @@ -157,8 +165,11 @@ void OCL4DNNConvSpatial::setFusionDefine(ocl4dnnFusedActiv_t fused_activ) } template -void OCL4DNNConvSpatial::setFusionArg(ocl4dnnFusedActiv_t fused_activ, ocl::Kernel &kernel, cl_uint &argIdx) +void OCL4DNNConvSpatial::setFusionArg(ocl4dnnFusedActiv_t fused_activ, bool fused_eltwise, ocl::Kernel &kernel, cl_uint &argIdx) { + if (fused_eltwise) + kernel.set(argIdx++, (cl_mem)bottom_data2_.handle(ACCESS_READ)); + switch (fused_activ) { case OCL4DNN_CONV_FUSED_ACTIV_RELU: kernel.set(argIdx++, (float)negative_slope_); @@ -166,6 +177,9 @@ void OCL4DNNConvSpatial::setFusionArg(ocl4dnnFusedActiv_t fused_activ, oc case OCL4DNN_CONV_FUSED_ACTIV_PRELU: kernel.set(argIdx++, (cl_mem)negative_slope_umat_.handle(ACCESS_READ)); break; + case OCL4DNN_CONV_FUSED_ACTIV_POWER: + kernel.set(argIdx++, (float)power_); + break; default: ; } @@ -255,7 +269,7 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, addDef("ALIGNED_NUM_FILTERS", (int)alignSize(M_, simd_size)); addDef("OUT_BLOCK_SIZE", (output_block_width*output_block_height)); addDef("APPLY_BIAS", bias_term_); - setFusionDefine(fused_activ_); + setFusionDefine(fused_activ_, fused_eltwise_); src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc; } @@ -277,7 +291,7 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, addDef("APPLY_BIAS", bias_term_); addDef("OUTPUT_Z", M_); addDef("ZPAR", 1); - setFusionDefine(fused_activ_); + setFusionDefine(fused_activ_, fused_eltwise_); src_ = cv::ocl::dnn::conv_layer_spatial_oclsrc; } @@ -314,7 +328,7 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, addDef("TILE_N_LAST", M_ % 32); addDef("TILE_N_LAST_DIV8", (M_ % 32) / 8); addDef("APPLY_BIAS", bias_term_); - setFusionDefine(fused_activ_); + setFusionDefine(fused_activ_, fused_eltwise_); src_ = ocl::dnn::conv_layer_spatial_oclsrc; } } @@ -370,14 +384,37 @@ void OCL4DNNConvSpatial::setActivPReLU(bool fuse_activ, std::vector +void OCL4DNNConvSpatial::setActivPower(bool fuse_activ, float power) +{ + if ( fuse_activ ) + { + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_POWER; + power_ = power; + } + else + fused_activ_ = OCL4DNN_CONV_FUSED_ACTIV_NONE; +} + template bool OCL4DNNConvSpatial::Forward(const UMat& bottom, + const UMat& bottom2, const UMat& weight, const UMat& bias, UMat& top, int32_t numImages) { num_ = numImages; + if (!bottom2.empty()) + { + fused_eltwise_ = true; + bottom_data2_ = bottom2; + } + else + { + fused_eltwise_ = false; + } + prepareKernel(bottom, top, weight, bias, numImages); if (bestKernelConfig.empty()) return false; @@ -428,7 +465,8 @@ void OCL4DNNConvSpatial::generateKey() << "p" << pad_w_ << "x" << pad_h_ << "_" << "num" << num_ << "_" << "M" << M_ << "_" - << "activ" << fused_activ_; + << "activ" << fused_activ_ << "_" + << "eltwise" << fused_eltwise_; key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str(); @@ -678,7 +716,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, return false; cl_uint argIdx = 0; - setFusionArg(fused_activ_, kernel, argIdx); + setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); UMat img_buffer; if (image_offset) @@ -771,7 +809,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, return false; cl_uint argIdx = 0; - setFusionArg(fused_activ_, kernel, argIdx); + setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); UMat img_buffer; if (image_offset) @@ -888,7 +926,7 @@ bool OCL4DNNConvSpatial::convolve(const UMat &bottom, UMat &top, return false; cl_uint argIdx = 0; - setFusionArg(fused_activ_, kernel, argIdx); + setFusionArg(fused_activ_, fused_eltwise_, kernel, argIdx); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(bottom)); kernel.set(argIdx++, image_offset); kernel.set(argIdx++, ocl::KernelArg::PtrReadOnly(weight)); @@ -1491,6 +1529,7 @@ void OCL4DNNConvSpatial::prepareKernel(const UMat &bottom, UMat &top, if (loadCachedConfig()) // check in-memory cache return; + if (loadTunedConfig()) // check external storage return; diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index 8b892b5e7c4f7a31ba10f51305fc1692e9b682c0..c7d38117d8d469e0ef3ad18339701953c3f7668a 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -52,12 +52,21 @@ #elif defined(FUSED_CONV_PRELU) #define ACTIVATION_RELU_FUNCTION(x, c) ((Dtype)(x) > 0 ? (Dtype)(x) : ((Dtype)(x) * (Dtype)(negative_slope[c]))) #define NEGATIVE_SLOPE_ARG __global const Dtype *negative_slope, +#elif defined(FUSED_CONV_POWER) +#define ACTIVATION_RELU_FUNCTION(x, c) pow(x, power) +#define NEGATIVE_SLOPE_ARG Dtype power, #else #define ACTIVATION_RELU_FUNCTION(x, c) (x) #define NEGATIVE_SLOPE_ARG #endif +#ifdef FUSED_CONV_ELTWISE +#define ACTIVATION_FUNCTION(_dst_, _offset_, _data_, _channel_) do { (_dst_)[(_offset_)] = ACTIVATION_RELU_FUNCTION(eltwise_data[(_offset_)] + (_data_), _channel_);} while(0) +#define ELTWISE_DATA_ARG __global Dtype* eltwise_data, +#else #define ACTIVATION_FUNCTION(_dst_, _offset_, _data_, _channel_) do { (_dst_)[(_offset_)] = ACTIVATION_RELU_FUNCTION(_data_, _channel_);} while(0) +#define ELTWISE_DATA_ARG +#endif #define __CAT(x, y) x##y @@ -99,6 +108,7 @@ #ifdef KERNEL_BASIC __kernel void ConvolveBasic( + ELTWISE_DATA_ARG NEGATIVE_SLOPE_ARG __global Dtype* image_data, int image_offset, @@ -193,6 +203,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE))) #endif __kernel void convolve_simd( + ELTWISE_DATA_ARG NEGATIVE_SLOPE_ARG __global Dtype* inputs_base, filter_qualifier Dtype* weights_base, @@ -413,6 +424,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ #define ROW_PITCH input_width #define GEMM_LIKE_KERNEL_ARGS \ + ELTWISE_DATA_ARG \ NEGATIVE_SLOPE_ARG \ const __global Dtype *src0, \ const __global Dtype *src1, \