From 0d00c31aae854032aaa0c12f2c218cc874c747e6 Mon Sep 17 00:00:00 2001
From: liuruilong <liuruilong@baidu.com>
Date: Thu, 12 Jul 2018 10:18:32 +0800
Subject: [PATCH] correct buffer

---
 .../paddle-mobile-demo/ViewController.swift   | 17 ++---
 .../paddle-mobile/Common/MetalExtension.swift | 21 ++++++-
 .../paddle-mobile/Executor.swift              | 29 +++++----
 .../paddle-mobile/paddle-mobile/Loader.swift  | 27 ++++----
 .../Operators/ConvAddBatchNormReluOp.swift    |  9 +++
 .../paddle-mobile/Operators/FeedOp.swift      |  4 +-
 .../Kernels/ConvAddBatchNormReluKernel.swift  |  3 +-
 .../Operators/Kernels/ConvKernel.metal        | 63 ++++++++++++++++---
 .../Operators/Kernels/Kernels.metal           | 20 ++++--
 .../paddle-mobile/Program/Attribute.swift     |  5 +-
 .../paddle-mobile/framework/Tensor.swift      | 42 ++++++-------
 .../paddle-mobile/framework/Texture.swift     |  4 +-
 test/net/test_mobilenet.cpp                   |  5 +-
 13 files changed, 164 insertions(+), 85 deletions(-)
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
index 7c06d16c14..612897e711 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
@@ -29,11 +29,11 @@ class ViewController: UIViewController {
 //    let queue: MTLCommandQueue
     func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, complete: @escaping (MTLTexture) -> Void) {        
         let tmpTextureDes = MTLTextureDescriptor.init()
-        tmpTextureDes.width = 227
-        tmpTextureDes.height = 227
+        tmpTextureDes.width = 224
+        tmpTextureDes.height = 224
         tmpTextureDes.depth = 1
         tmpTextureDes.usage = [.shaderRead, .shaderWrite]
-        tmpTextureDes.pixelFormat = .rgba16Float
+        tmpTextureDes.pixelFormat = .rgba32Float
         tmpTextureDes.textureType = .type2D
         tmpTextureDes.storageMode = .shared
         tmpTextureDes.cpuCacheMode = .defaultCache
@@ -64,23 +64,18 @@ class ViewController: UIViewController {
         }
        
         scaleTexture(queue: queue!, input: inTexture) { (inputTexture) in
-            let loader = Loader<Float16>.init()
+            let loader = Loader<Float32>.init()
             do {
                 let modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
                 let paraPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
                 let program = try loader.load(device: self.device, modelPath: modelPath, paraPath: paraPath)
-                let executor = try Executor<Float16>.init(inDevice: self.device, inQueue: queue!, inProgram: program)
-                let output = try executor.predict(input: inputTexture, expect: [1, 227, 227, 3])
+                let executor = try Executor<Float32>.init(inDevice: self.device, inQueue: queue!, inProgram: program)
+                let output = try executor.predict(input: inputTexture, expect: [1, 224, 224, 3])
                 //            print(output)
             } catch let error {
                 print(error)
             }
         }
-        
-        
-        
-       
     }
-
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
index 0504250c40..b9380161b1 100644
--- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
+++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
@@ -93,7 +93,7 @@ public extension MTLTexture {
         print("texture: \(self)")
         if textureType == .type2DArray {
             for i in 0..<arrayLength{
-                var str: String = "slice: \(i): "
+                var str: String = "slice: \(i): \n"
                 let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
                 let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
                 let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
@@ -142,8 +142,25 @@ public extension MTLTexture {
 }
 
 
+public extension MTLBuffer {
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("MTLBuffer: \(self) ")
+        var str = ""
+        if stridable && length/MemoryLayout<T>.stride > 1000{
+            for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
+                str += " \(contents().assumingMemoryBound(to: T.self)[j])"
+            }
+        } else {
+            for i in 0..<length/MemoryLayout<T>.size {
+                str += " \(contents().assumingMemoryBound(to: T.self)[i])"
+            }
+        }
+        print(str)
+        return nil
+}
 
-
+}
 
 
 
diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift
index 7b93b13349..7e4bb233fd 100644
--- a/metal/paddle-mobile/paddle-mobile/Executor.swift
+++ b/metal/paddle-mobile/paddle-mobile/Executor.swift
@@ -55,17 +55,8 @@ public class Executor<P: PrecisionType> {
         device = inDevice
         queue = inQueue
         for block in inProgram.programDesc.blocks {
-//            for i in 0..<2 {
-//                let op = block.ops[i]
-//                do {
-//                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
-//                    op.inferShape()
-//                    ops.append(op)
-//                } catch let error {
-//                    throw error
-//                }
-//            }
-            for op in block.ops {
+            for i in 0..<2 {
+                let op = block.ops[i]
                 do {
                     let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
                     op.inferShape()
@@ -74,6 +65,15 @@ public class Executor<P: PrecisionType> {
                     throw error
                 }
             }
+//            for op in block.ops {
+//                do {
+//                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+//                    op.inferShape()
+//                    ops.append(op)
+//                } catch let error {
+//                    throw error
+//                }
+//            }
         }
     }
     
@@ -95,9 +95,9 @@ public class Executor<P: PrecisionType> {
         
         buffer.addCompletedHandler { (commandbuffer) in
             
-//            for op in self.ops {
-//                op.delogOutput()
-//            }
+            for op in self.ops {
+                op.delogOutput()
+            }
             
             let afterDate = Date.init()
             print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))")
@@ -114,7 +114,6 @@ public class Executor<P: PrecisionType> {
             throw PaddleMobileError.netError(message: "output var type error")
         }
         
-        
         return output
     }
     
diff --git a/metal/paddle-mobile/paddle-mobile/Loader.swift b/metal/paddle-mobile/paddle-mobile/Loader.swift
index 6e9af2930f..31fd21ebd4 100644
--- a/metal/paddle-mobile/paddle-mobile/Loader.swift
+++ b/metal/paddle-mobile/paddle-mobile/Loader.swift
@@ -50,7 +50,7 @@ public class Loader<P: PrecisionType> {
                 return pointee
             }
             
-            _ = pointerReader(type: UInt32.self)
+            let _ = pointerReader(type: UInt32.self)
             let lodLevel = pointerReader(type: UInt64.self)
             for _ in 0..<lodLevel {
                 let size = pointerReader(type: UInt64.self)
@@ -62,6 +62,7 @@ public class Loader<P: PrecisionType> {
             let _ = pointerReader(type: UInt32.self)
             
             let tensorDescSize = pointerReader(type: Int32.self)
+            
             fseek(file, Int(tensorDescSize), SEEK_CUR)
             nowIndex += Int(tensorDescSize)
             
@@ -70,21 +71,21 @@ public class Loader<P: PrecisionType> {
              */
             
             //现在模型传入模型为  Float 类型, 这块应该根据模型来
-            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
-            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
+//            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
+//            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
+            let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
             
-//            let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
-//            guard bytesRead == tensor.data.size else {
-//                throw PaddleMobileError.loaderError(message: "param read size error")
-//            }
+            guard bytesRead == tensor.data.size else {
+                throw PaddleMobileError.loaderError(message: "param read size error")
+            }
             
             // TODO: use script to convert
-            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
-            for i in 0..<tensor.numel() {
-                tensor.data[i] = P.init(inFloat: tmpPointer[i])
-            }
-            tmpPointer.deinitialize(count: tmpCapacity)
-            tmpPointer.deallocate()
+//            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
+//            for i in 0..<tensor.numel() {
+//                tensor.data[i] = P.init(inFloat: tmpPointer[i])
+//            }
+//            tmpPointer.deinitialize(count: tmpCapacity)
+//            tmpPointer.deallocate()
             
             nowIndex += bytesRead
         }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
index 649c5e801f..f9296afc23 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
@@ -107,7 +107,16 @@ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKer
     }
     
     func delogOutput() {
+        let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
+        para.filter.logDataPointer(header: "filter data pointer: ")
+        print("filter: \(para.filter)")
         
+        print("biase: \(para.bias)")
+        
+        let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
+        let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
+        
+        let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: true)
     }
     
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
index 1e12d8af0d..ec976f174e 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
@@ -61,8 +61,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
     
     func delogOutput() {
 //        para.input.mtlTexture.logDesc()
-        let _: Float16? = para.input.mtlTexture.logDesc(header: "feed input: ")
-        let _: Float16? = para.output.metalTexture.logDesc(header: "feed output: ")
+//        let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
+//        let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: true)
     }
 }
 
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
index 28ac80258b..da2fa146d1 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
@@ -29,7 +29,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
         let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
         
         for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
-            let inv = pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
+            let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
             invs.append(P(inv))
         }
         
@@ -59,7 +59,6 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
         }
         
         print("ConvAddBatchNormReluKernel compute")
-        
         encoder.setTexture(param.input.metalTexture, index: 0)
         encoder.setTexture(param.output.metalTexture, index: 1)
         encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
index 19fb25804a..9cb8400dc7 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
@@ -59,13 +59,56 @@ kernel void conv3x3(texture2d_array<half, access::sample> inTexture [[texture(0)
     outTexture.write(output, gid.xy, gid.z);
 }
 
-kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> inTexture [[texture(0)]],
-                                        texture2d_array<half, access::write> outTexture [[texture(1)]],
+//kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+//                                        texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                                         constant MetalConvParam &param [[buffer(0)]],
+//                                         const device half4 *weights [[buffer(1)]],
+//                                         const device half4 *biase [[buffer(2)]],
+//                                         const device half4 *new_scale [[buffer(3)]],
+//                                         const device half4 *new_biase [[buffer(4)]],
+//                                         uint3 gid [[thread_position_in_grid]]) {
+//
+//    if (gid.x >= outTexture.get_width() ||
+//        gid.y >= outTexture.get_height() ||
+//        gid.z >= outTexture.get_array_size()) {
+//        return;
+//    }
+//
+//    short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
+//    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+//    const uint wightSliceCount = 36;
+//    uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size();
+//    half4 output = 0.0;
+//    for (uint i = 0; i < inTexture.get_array_size(); ++i) {
+//        half4 input[9];
+//        input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
+//        input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
+//        input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
+//        input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
+//        input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+//        input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
+//        input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
+//        input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
+//        input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
+//        for (int j = 0; j < 9; ++j) {
+//            half4 weight = weights[weithTo + wightSliceCount * i + j * 4];
+//            output += dot(input[j], weight);
+//        }
+//    }
+//
+//    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h);
+//    outTexture.write(output, gid.xy, gid.z);
+//
+//}
+
+
+kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
                                          constant MetalConvParam &param [[buffer(0)]],
-                                         const device half4 *weights [[buffer(1)]],
-                                         const device half4 *biase [[buffer(2)]],
-                                         const device half4 *new_scale [[buffer(3)]],
-                                         const device half4 *new_biase [[buffer(4)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
                                          uint3 gid [[thread_position_in_grid]]) {
     
     if (gid.x >= outTexture.get_width() ||
@@ -78,9 +121,9 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i
     constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
     const uint wightSliceCount = 36;
     uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size();
-    half4 output = 0.0;
+    float4 output = 0.0;
     for (uint i = 0; i < inTexture.get_array_size(); ++i) {
-        half4 input[9];
+        float4 input[9];
         input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
         input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
         input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
@@ -91,12 +134,12 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i
         input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
         input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
         for (int j = 0; j < 9; ++j) {
-            half4 weight = weights[weithTo + wightSliceCount * i + j * 4];
+            float4 weight = weights[weithTo + wightSliceCount * i + j * 4];
             output += dot(input[j], weight);
         }
     }
     
-    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h);
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
     outTexture.write(output, gid.xy, gid.z);
     
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
index 9b202174de..155f4877fb 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
@@ -73,16 +73,28 @@ kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)
     outTexture.write(input, gid.xy, gid.z);
 }
 
-kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
-                               texture2d_array<half, access::write> outTexture [[texture(1)]],
-                               uint3 gid [[thread_position_in_grid]]) {
+//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
+//                               texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                               uint3 gid [[thread_position_in_grid]]) {
+//    if (gid.x >= inTexture.get_width() ||
+//        gid.y >= inTexture.get_height()){
+//        return;
+//    }
+//    const half4 input = inTexture.read(gid.xy);
+//    outTexture.write(input, gid.xy, 0);
+//}
+
+kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
     if (gid.x >= inTexture.get_width() ||
         gid.y >= inTexture.get_height()){
         return;
     }
-    const half4 input = inTexture.read(gid.xy);
+    const float4 input = inTexture.read(gid.xy);
     outTexture.write(input, gid.xy, 0);
 }
 
 
 
+
diff --git a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
index 924f92ddd5..c26fd2132e 100644
--- a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
+++ b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
@@ -32,6 +32,9 @@ extension Int64: Attr {
 extension Array: Attr {
 }
 
+extension String: Attr {
+}
+
 func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
     switch attrDesc.type {
     case .boolean:
@@ -39,7 +42,7 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At
     case .int:
         return Int(attrDesc.i)
     case .string:
-        return attrDesc.strings
+        return attrDesc.s
     case .long:
         return attrDesc.l
     case .float:
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
index 50e22bf7a8..dee5d79aa9 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
@@ -38,7 +38,7 @@ class Tensor<P: PrecisionType>: Tensorial {
             pointer = inPointer
         }
         let size: Int
-        fileprivate var pointer: UnsafeMutablePointer<P>
+        var pointer: UnsafeMutablePointer<P>
         subscript(index: Int) -> P{
             get {
                 return pointer[index]
@@ -104,7 +104,7 @@ class Tensor<P: PrecisionType>: Tensorial {
                     for _ in 0..<dim[0] * dim[1] * dim[2] {
                         for j in 0..<paddedC {
                             if j < C {
-                                dstPtr?[j] = data.pointer[j]
+                                dstPtr?[j] = tmpPointer[j]
                             }
                         }
                         tmpPointer += C
@@ -134,7 +134,7 @@ class Tensor<P: PrecisionType>: Tensorial {
             for h in 0..<H{
                 for w in 0..<W{
                     for c in 0..<C{
-                        newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * w + w]
+                        newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
                         index += 1
                     }
                 }
@@ -146,27 +146,25 @@ class Tensor<P: PrecisionType>: Tensorial {
 
 
 extension Tensor {
+    
     var debugDescription: String {
-        var str = ""
-        
-//        for i in 0..<buffer.length/MemoryLayout<P>.stride {
-//            str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
-//        }
+        var str = "dim: \(dim) \n"
+        str += "MTLBuffer: \(self.buffer) \n"
+        for i in 0..<buffer.length/MemoryLayout<P>.size {
+            str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
+        }
         return str
-        
-//        var str = ""
-//        str += "Dim: \(dim) \n value:[ "
-//        if data.size < 20 {
-//            for d in 0..<data.size {
-//                str += " \(data[d]) "
-//            }
-//        } else {
-//            for d in stride(from: 0, to: data.size, by: data.size/20) {
-//                str += " \(data[d]) "
-//            }
-//        }
-//        str += " ]"
-//        return str
+    }
+    
+    func logDataPointer(header: String = "") {
+        print(header)
+        var str = ""
+        str += "data size: \(data.size) \n"
+        str += "dim: \(dim) \n"
+        for i in 0..<numel() {
+            str += " \(data.pointer[i])"
+        }
+        print(str)
     }
     
     var description: String {
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
index fcd28c5843..9370092e1d 100644
--- a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
+++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
@@ -69,7 +69,7 @@ public class Texture<P: PrecisionType>: Tensorial {
         if MemoryLayout<P>.size == 1 {
             tmpTextureDes.pixelFormat = .rgba8Unorm
         } else if MemoryLayout<P>.size == 2 {
-            tmpTextureDes.pixelFormat = .rgba16Float
+            tmpTextureDes.pixelFormat = .rgba32Float
         } else if MemoryLayout<P>.size == 4 {
 //            tmpTextureDes.pixelFormat = .r32Float
             tmpTextureDes.pixelFormat = .rgba32Float
@@ -130,7 +130,7 @@ extension Texture {
     public var debugDescription: String{
         var str = ""
         str += "Dim: \(dim) \n value:[ "
-//        str += "\(metalTexture)"
+        str += "\(metalTexture)"
         str += " ]"
         return str
     }
diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp
index 8400b08f22..abd8dd0e97 100644
--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -19,7 +19,10 @@ limitations under the License. */
 int main() {
   paddle_mobile::Loader<paddle_mobile::CPU> loader;
   auto time1 = time();
-  auto program = loader.Load(g_mobilenet, true);
+//  auto program = loader.Load(g_mobilenet_combine, true);
+  auto program = loader.Load(g_mobilenet_combine + "/model",
+                               g_mobilenet_combine + "/params", true);
+
   auto time2 = time();
   DLOG << "load cost :" << time_diff(time1, time1) << "ms";
   paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);
-- 
GitLab