diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
index 01c9c6c1fc277be1ed5fa6ace6774fc7f03f2de9..11bd1b67e06e3b2d4bcdb100b7afad3848644fc9 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
@@ -71,7 +71,128 @@ extension MTLDevice {
     return buffer!
   }
   
+  func texture2tensor_loop<P>(texture: MTLTexture, cb: ([Int], P)->Void) -> Void {
+    let bpR = texture.width * 4 * MemoryLayout<P>.size
+    let bpI = texture.height * bpR
+    let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1))
+    for i in 0..<texture.arrayLength {
+      let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: bpI)
+      texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
+      for tx in 0..<texture.width * texture.height * 4 {
+        var k = tx
+        var xyzn: [Int] = [0, 0, 0, 0]
+        xyzn[1] = k / (texture.width * 4)
+        k %= (texture.width * 4)
+        xyzn[3] = k % 4
+        xyzn[0] = k / 4
+        xyzn[2] = i
+        cb(xyzn, pointer[tx])
+      }
+    }
+  }
+  
+  func texture2tensor_3<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
+    var tdim: [Int] = [1, 1, 1, 1]
+    for i in 0..<dim.count {
+      tdim[4 - dim.count + i] = dim[i]
+    }
+    let count = dim.reduce(1) { $0 * $1 }
+    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
+    let ndim: [Int] = transpose.map { tdim[$0] }
+    assert(dim.count == 3)
+    assert(texture.width == ndim[3])
+    assert(texture.height == ndim[2])
+    assert(ndim[0] == 1)
+    assert(texture.arrayLength == (ndim[1] + 3) / 4)
+    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
+      var tg: [Int] = [0, 0, 0, 0]
+      tg[1] = xyzn[2] * 4 + xyzn[3]
+      tg[2] = xyzn[1]
+      tg[3] = xyzn[0]
+      var ig: [Int] = [0, 0, 0, 0]
+      for k in 0..<4 {
+        ig[transpose[k]] = tg[k]
+      }
+      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
+      if ix < count {
+        tensor[ix] = v
+      }
+    }
+    return tensor
+  }
+  
+  func texture2tensor_2<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
+    var tdim: [Int] = [1, 1, 1, 1]
+    for i in 0..<dim.count {
+      tdim[4 - dim.count + i] = dim[i]
+    }
+    let count = dim.reduce(1) { $0 * $1 }
+    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
+    let ndim: [Int] = transpose.map { tdim[$0] }
+    assert(dim.count == 2)
+    let w = (ndim[3] + 3) / 4
+    assert(texture.width == w)
+    assert(texture.height == ndim[2])
+    assert(ndim[0] == 1)
+    assert(ndim[1] == 1)
+    assert(texture.arrayLength == 1)
+    
+    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
+      var tg: [Int] = [0, 0, 0, 0]
+      tg[2] = xyzn[1]
+      tg[3] = xyzn[0] * 4 + xyzn[3]
+      var ig: [Int] = [0, 0, 0, 0]
+      for k in 0..<4 {
+        ig[transpose[k]] = tg[k]
+      }
+      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
+      if ix < count {
+        tensor[ix] = v
+      }
+    }
+    return tensor
+  }
+  
+  func texture2tensor_1<P>(texture: MTLTexture, dim: [Int],  transpose: [Int] = [0, 1, 2, 3]) -> [P] {
+    var tdim: [Int] = [1, 1, 1, 1]
+    for i in 0..<dim.count {
+      tdim[4 - dim.count + i] = dim[i]
+    }
+    let count = dim.reduce(1) { $0 * $1 }
+    var tensor: [P] = .init(repeating: Float32(0.0) as! P, count: count)
+    let ndim: [Int] = transpose.map { tdim[$0] }
+    assert(dim.count == 1)
+    let w = (ndim[3] + 3) / 4
+    assert(texture.width == w)
+    assert(texture.height == 1)
+    assert(ndim[0] == 1)
+    assert(ndim[1] == 1)
+    assert(ndim[2] == 1)
+    assert(texture.arrayLength == 1)
+    
+    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
+      var tg: [Int] = [0, 0, 0, 0]
+      tg[3] = xyzn[0] * 4 + xyzn[3]
+      var ig: [Int] = [0, 0, 0, 0]
+      for k in 0..<4 {
+        ig[transpose[k]] = tg[k]
+      }
+      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
+      if ix < count {
+        tensor[ix] = v
+      }
+    }
+    return tensor
+  }
+  
   func texture2tensor<P>(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] {
+    if dim.count == 3 {
+      return texture2tensor_3(texture: texture, dim: dim, transpose: transpose)
+    } else if dim.count == 2 {
+      return texture2tensor_2(texture: texture, dim: dim, transpose: transpose)
+    } else if dim.count == 1 {
+      return texture2tensor_1(texture: texture, dim: dim, transpose: transpose)
+    }
     var tdim: [Int] = [1, 1, 1, 1]
     for i in 0..<dim.count {
       tdim[4 - dim.count + i] = dim[i]
@@ -84,30 +205,19 @@ extension MTLDevice {
     assert(texture.height == ndim[1])
     assert(texture.arrayLength == (ndim[0] * ndim[3] + 3) / 4)
     
-    let bpR = ndim[2] * 4 * MemoryLayout<P>.size
-    let bpI = ndim[1] * bpR
-    let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1))
-    for i in 0..<texture.arrayLength {
-      let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: ndim[1] * ndim[2] * 4 * MemoryLayout<P>.size)
-      texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
-      
-      for h in 0..<ndim[1] {
-        for w in 0..<ndim[2] {
-          for k in 0..<4 {
-            let tx = (h * ndim[2] + w) * 4 + k
-            let n = (i * 4 + k) / ndim[3]
-            let c = (i * 4 + k) % ndim[3]
-            let jg = [n, h, w, c]
-            var ig = [0, 0, 0, 0]
-            for d in 0..<4 {
-              ig[transpose[d]] = jg[d]
-            }
-            let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
-            if ix < count {
-              tensor[ix] = pointer[tx]
-            }
-          }
-        }
+    texture2tensor_loop(texture: texture) { (xyzn: [Int], v: P) in
+      var tg: [Int] = [0, 0, 0, 0]
+      tg[1] = xyzn[1]
+      tg[2] = xyzn[0]
+      tg[0] = (xyzn[2] * 4 + xyzn[3]) / ndim[3]
+      tg[3] = (xyzn[2] * 4 + xyzn[3]) % ndim[3]
+      var ig: [Int] = [0, 0, 0, 0]
+      for k in 0..<4 {
+        ig[transpose[k]] = tg[k]
+      }
+      let ix = ig[0] * tdim[1] * tdim[2] * tdim[3] + ig[1] * tdim[2] * tdim[3] + ig[2] * tdim[3] + ig[3]
+      if ix < count {
+        tensor[ix] = v
       }
     }
     return tensor
diff --git a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
index 106a1882ff9dd6edeff107f013c282346fd83977..7debb79c737e363940a735c4f74c7c9efbe58c0b 100644
--- a/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
+++ b/metal/paddle-mobile/paddle-mobile/MobilenetSSD_AR.swift
@@ -30,7 +30,7 @@ public class MobileNet_ssd_AR: Net{
   class MobilenetssdPreProccess: CusomKernel {
     init(device: MTLDevice) {
       let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
-      super.init(device: device, inFunctionName: "mobilent_ar_preprocess_half", outputDim: s, usePaddleMobileLib: false)
+      super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false)
     }
   }
   
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
index 38563c51ddd7c416bff329f7766c58255017bb1e..cd559e79ad6bc267ca88e857fb2def9a7968dc77 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
@@ -56,9 +56,11 @@ class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam
       throw error
     }
   }
+  
+  func delogOutput() {
+    print(" \(type) output: ")
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+    print(outputArray.strideArray())
+  }
 }
-
-
-
-
-
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
index eb5cf7d02dc085add98d977feabaf8328632ed55..f1277272a7b92aa9f27492b411fe289c00a35c8e 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift
@@ -53,15 +53,10 @@ class BilinearInterpOp<P: PrecisionType>: Operator<BilinearInterpKernel<P>, Bili
   
   func delogOutput() {
     print(" \(type) output: ")
-    let padToFourDim = para.output.padToFourDim
-    if para.output.transpose == [0, 1, 2, 3] {
-      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-      print(outputArray.strideArray())
-    } else if para.output.transpose == [0, 2, 3, 1] {
-      print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
-    } else {
-      fatalError(" not implemet")
-    }
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+//    print(outputArray)
+    print(outputArray.strideArray())
   }
   
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
index 1abdb66aa7bdf89759a5987e3dde523c1f1dcf41..0a30b2bbefbda9694a46d4e036548a6c680224a3 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift
@@ -65,15 +65,10 @@ class ConcatOp<P: PrecisionType>: Operator<ConcatKernel<P>, ConcatParam<P>>, Run
   
   func delogOutput() {
     print(" \(type) output: ")
-    let padToFourDim = para.output.padToFourDim
-    if para.output.transpose == [0, 1, 2, 3] {
-      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-      print(outputArray.strideArray())
-    } else if para.output.transpose == [0, 2, 3, 1] {
-      print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
-    } else {
-      fatalError(" not implemet")
-    }
+    
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+    print(outputArray.strideArray())
   }
   
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
index caa56ba256609f81a109f088824a0e7f9a1532b0..dad8d0c6ac2e5a93273573473c700179f8b90a37 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
@@ -20,12 +20,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
     let varianceP = param.variance.data.pointer
     let meanP = param.mean.data.pointer
     let scaleP = param.scale.data.pointer
-    let biasP = param.scale.data.pointer
+    let biasP = param.bias.data.pointer
     for i in 0..<count {
       let invStd = P(1 / (Float32(varianceP[i]) + param.epsilon).squareRoot())
       biasP[i] = biasP[i] - meanP[i] * invStd * scaleP[i]
       scaleP[i] = invStd * scaleP[i]
     }
+
     param.bias.initBuffer(device: device, precision: computePrecision)
     param.scale.initBuffer(device: device, precision: computePrecision)
     param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
index 478b1a5f807f4387ce04fde46e6d96c3cfdd06ec..7f3e7433760cc1fa4d093b08027bce7c79172532 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BilinearInterpKernel.swift
@@ -27,10 +27,16 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
     
     encoder.setTexture(param.input.metalTexture, index: 0)
     encoder.setTexture(param.output.metalTexture, index: 1)
-    let ratio_h: Float32 = Float32(param.input.tensorDim.dims[2]) / Float32(param.output.tensorDim.dims[2])
-    let ratio_w: Float32 = Float32(param.input.tensorDim.dims[3]) / Float32(param.output.tensorDim.dims[3])
+    var ratio_h: Float32 = 0
+    var ratio_w: Float32 = 0
+    if param.output.tensorDim.dims[2] > 1 {
+      ratio_h = Float32(param.input.tensorDim.dims[2]-1) / Float32(param.output.tensorDim.dims[2]-1)
+    }
+    if param.output.tensorDim.dims[3] > 1 {
+      ratio_w = Float32(param.input.tensorDim.dims[3]-1) / Float32(param.output.tensorDim.dims[3]-1)
+    }
     var p = BilinearInterpMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w)
-    encoder.setBytes(&p, length: MemoryLayout<ConcatMetalParam>.size, index: 0)
+    encoder.setBytes(&p, length: MemoryLayout<BilinearInterpMetalParam>.size, index: 0)
     encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
     encoder.endEncoding()
   }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
index 657187211eb0ed5efe2250ac85ea50d0bffdb0c1..96333a07a9669ecb2b5bfe901d71be729e37b533 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal
@@ -17,14 +17,14 @@ using namespace metal;
 
 kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
                       texture2d_array<float, access::write> outTexture [[texture(1)]],
-                      const device float4 * newScale [[buffer(0)]],
-                      const device float4 * newBias [[buffer(1)]],
+                      const device float4 * nscale [[buffer(0)]],
+                      const device float4 * nbias [[buffer(1)]],
                       uint3 gid [[thread_position_in_grid]]) {
   if (gid.x >= outTexture.get_width() ||
       gid.y >= outTexture.get_height() ||
       gid.z >= outTexture.get_array_size()) return;
   const float4 input = inTexture.read(gid.xy, gid.z);
-  float4 output = input * newScale[gid.z] + newBias[gid.z];
+  float4 output = input * nscale[gid.z] + nbias[gid.z];
   outTexture.write(output, gid.xy, gid.z);
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
index cd6971bfda624a2c6b0bf9f4b51bf3e2a7c7195b..0dc877540876351ebcc5381bb151d2406772cba0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal
@@ -14,8 +14,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
   if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
     r = input.read(gid.xy, gid.z);
   } else {
-    float w = gid.x * pm.ratio_w;
-    float h = gid.y * pm.ratio_h;
+    P w = gid.x * pm.ratio_w;
+    P h = gid.y * pm.ratio_h;
     uint w0 = w, h0 = h;
     uint w1 = w0 + 1, h1 = h0 + 1;
     P w1lambda = w - w0, h1lambda = h - h0;
@@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[te
     VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
     VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
     VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
-    r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
+    r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+      + h1lambda * (w2lambda * r2 + w1lambda * r3);
   }
   output.write(r, gid.xy, gid.z);
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
index c4eca3e1af7565b3dbef4646b80beb5a2725c714..394cf89db09d47b0d3c87ff124c21a93962c0972 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
@@ -16,8 +16,6 @@
 using namespace metal;
 
 struct bilinear_interp_param {
-//  int32_t out_h;
-//  int32_t out_w;
   float ratio_h;
   float ratio_w;
 };
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
index 9858cf9c3c035364ed13bae5c131f8a4a9f199fc..40bae035c097b5ab386d78520b6b04f074eb2fee 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
@@ -17,16 +17,16 @@ using namespace metal;
 
 
 inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
-  abcd[0] = abcd[1] = abcd[2] = 1;
+  abcd[0] = abcd[1] = abcd[2] = 0;
   abcd[3] = xyzn[0] * 4 + xyzn[3];
 }
 inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
-  abcd[0] = abcd[1] = 1;
+  abcd[0] = abcd[1] = 0;
   abcd[2] = xyzn[1];
   abcd[3] = xyzn[0] * 4 + xyzn[3];
 }
 inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
-  abcd[0] = 1;
+  abcd[0] = 0;
   abcd[3] = xyzn[0];
   abcd[2] = xyzn[1];
   abcd[1] = xyzn[2] * 4 + xyzn[3];
@@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
 }
 
 inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
-  xyzn[1] = xyzn[2] = 1;
+  xyzn[1] = xyzn[2] = 0;
   xyzn[0] = abcd[3] / 4;
   xyzn[1] = abcd[3] % 4;
 }
 inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
-  xyzn[2] = 1;
+  xyzn[2] = 0;
   xyzn[1] = abcd[2];
   xyzn[0] = abcd[3] / 4;
-  xyzn[1] = abcd[3] % 4;
+  xyzn[3] = abcd[3] % 4;
 }
 inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
   xyzn[0] = abcd[3];
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
index 0eacaf658b6c5b5e877dc58de289f93bd5873ccf..b62daaa3aa1dd7861a3b00018bc38b409b05d8e0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
@@ -122,20 +122,20 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
   int x = gid.x - pm.offset;
   if (x < 0) return;
   if (x < pm.vdim[0]) {
-    VECTOR(P, 4)  r = in0.read(gid.xy, gid.z);
+    VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
   x -= pm.vdim[0];
   if (x < pm.vdim[1]) {
-    VECTOR(P, 4)  r = in1.read(uint2(x, gid.y), gid.z);
+    VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
 #if N >= 3
   x -= pm.vdim[1];
   if (x < pm.vdim[2]) {
-    VECTOR(P, 4)  r = in2.read(uint2(x, gid.y), gid.z);
+    VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
@@ -143,7 +143,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
 #if N >= 4
   x -= pm.vdim[2];
   if (x < pm.vdim[3]) {
-    VECTOR(P, 4)  r = in3.read(uint2(x, gid.y), gid.z);
+    VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
@@ -151,7 +151,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
 #if N >= 5
   x -= pm.vdim[3];
   if (x < pm.vdim[4]) {
-    VECTOR(P, 4)  r = in4.read(uint2(x, gid.y), gid.z);
+    VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
@@ -159,7 +159,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[tex
 #if N >= 6
   x -= pm.vdim[4];
   if (x < pm.vdim[5]) {
-    VECTOR(P, 4)  r = in5.read(uint2(x, gid.y), gid.z);
+    VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
     out.write(r, gid.xy, gid.z);
     return;
   }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
index 3d6c141210ca58ed55b4f75d32640695bac55c1b..82d512e7095007b61d18158a8b9d04071b2b492b 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
@@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTextu
     if (index < count) {
       index2abcd(lrp.idim, index, tabcd);
       trans(lrp.itrans, tabcd, iabcd);
-      abcd2xyzn(iC, iabcd, ixyzn);
 #if RIN == 4
       abcd2xyzn_4(iC, iabcd, ixyzn);
 #else
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
index 4a27fba9839af5feb709a69a76529c60928a981e..72665c471ff0d1b1a2b7966f45afbf847438138b 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
@@ -72,10 +72,21 @@ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>
 
     print(" \(type) output: ")
     // output
-    let outputArray = para.output.metalTexture.float32Array()
-    print(outputArray)
+//    let outputArray = para.output.metalTexture.float32Array()
+//    print(outputArray.strideArray())
+    let device = para.input.metalTexture!.device
+    let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3])
+    let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3])
+    print("boxes: ")
+    print(boxes.strideArray())
+    print("variances: ")
+    print(variances.strideArray())
     // output
-//    print(" \(type) output: ")
+    print(" \(type) output: ")
+    
+    print(para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])).strideArray())
+//    print(para.output.realNHWC().strideArray())
+    
 //    let padToFourDim = para.output.padToFourDim
 //    if para.output.transpose == [0, 1, 2, 3] {
 //      let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
index c9f054c88af44ac3f5dd453b4696c7988d01fa8f..ca19b720c6a3e2559668dcfafb2a6fbf853c571c 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
@@ -47,6 +47,9 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
   func delogOutput() {
     print(" \(type) output: ")
     print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+    print(outputArray.strideArray())
   }
   
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
index bd257a65f3cbbe0e4ffc866885fc13d2c0bdb909..ac46baca91bd6eedab9241da68a05d08391ec931 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -69,15 +69,9 @@ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>,
   }
   func delogOutput() {
     print("reshape delog")
-//    let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
-//
-//    let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
-    let padToFourDim = para.output.padToFourDim
-    
-    let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-//    print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
-
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
     print(outputArray.strideArray())
-
+//    print(outputArray)
   }
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
index 41bf6784f51d648f2decfa62d586b94360bdd4be..b68d38e57340bb74b62f06db5b9e984da162dc1e 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
@@ -64,6 +64,11 @@ class SplitOp<P: PrecisionType>: Operator<SplitKernel<P>, SplitParam<P>>, Runabl
   
   func delogOutput() {
     print(" \(type) output: ")
+    let device = para.input.metalTexture!.device
+    for out in para.outputList {
+      let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose)
+      print(arr.strideArray())
+    }
   }
   
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
index 0213b52bf16fa498835729c5b7e3a65600f7669d..a90aa288b36bfb0eec59b9038813de0960f7aa9a 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
@@ -57,6 +57,9 @@ class TransposeOp<P: PrecisionType>: Operator<TransposeKernel<P>, TransposeParam
     } else {
       print(" not implement")
     }
+    let device = para.output.metalTexture!.device
+    let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+    print(outputArray.strideArray())
   }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
index cd329394c6cce744d6e3c4de61ed3df6aa00b070..b43ea3742bc23d18b596d7f90935da840342dbfd 100644
--- a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
+++ b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ScaleKernel: CusomKernel {
   init(device: MTLDevice, shape: Shape) {
-    super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
+    super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
   }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift
index f76d86c4725d6a89a2add77c584cd55d77154c75..85cf80bd092c9d14c9fcb349e6ac46a6df08b162 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift
@@ -14,10 +14,10 @@
 
 import Foundation
 
-let testTo = 2
+let testTo = 113
 var isTest = false
 
-let computePrecision: ComputePrecision = .Float16
+let computePrecision: ComputePrecision = .Float32
 
 public class ResultHolder {
   public let dim: [Int]
@@ -120,10 +120,10 @@ public class Executor<P: PrecisionType> {
       let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
       print(inputArr.strideArray())
 
-//      print(dim)
-//      writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
-//
-//      print("write to library done")
+      print(dim)
+      writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
+
+      print("write to library done")
 //      return
 //                  print(inputArr)
 //