From 64e6ac2b9e3cc09a94898d9f63820421b0fc859f Mon Sep 17 00:00:00 2001 From: dolphin8 Date: Sat, 15 Sep 2018 21:22:59 +0800 Subject: [PATCH] xx --- .../paddle-mobile/Common/MetalExtension.swift | 158 +++++++++++++++--- .../paddle-mobile/MobilenetSSD_AR.swift | 2 +- .../paddle-mobile/Operators/BatchNormOp.swift | 12 +- .../Operators/BilinearInterpOp.swift | 13 +- .../paddle-mobile/Operators/ConcatOp.swift | 13 +- .../Operators/Kernels/BatchNormKernel.swift | 3 +- .../Kernels/BilinearInterpKernel.swift | 12 +- .../Kernels/metal/BatchNormKernel.metal | 6 +- .../Kernels/metal/BilinearInterp.inc.metal | 7 +- .../Kernels/metal/BilinearInterp.metal | 2 - .../Operators/Kernels/metal/Common.metal | 12 +- .../Kernels/metal/ConcatKernel.inc.metal | 12 +- .../Kernels/metal/ReshapeKernel.inc.metal | 1 - .../paddle-mobile/Operators/PriorBoxOp.swift | 17 +- .../paddle-mobile/Operators/ReluOp.swift | 3 + .../paddle-mobile/Operators/ReshapeOp.swift | 12 +- .../paddle-mobile/Operators/SplitOp.swift | 5 + .../paddle-mobile/Operators/TransposeOp.swift | 3 + .../paddle-mobile/PaddleMobile.swift | 2 +- .../paddle-mobile/framework/Executor.swift | 12 +- 20 files changed, 215 insertions(+), 92 deletions(-) diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index 01c9c6c1fc..11bd1b67e0 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -71,7 +71,128 @@ extension MTLDevice { return buffer! } + func texture2tensor_loop

(texture: MTLTexture, cb: ([Int], P)->Void) -> Void { + let bpR = texture.width * 4 * MemoryLayout

.size + let bpI = texture.height * bpR + let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1)) + for i in 0.. = UnsafeMutablePointer

.allocate(capacity: bpI) + texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i) + for tx in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + var tdim: [Int] = [1, 1, 1, 1] + for i in 0..(texture: MTLTexture, dim: [Int], transpose: [Int] = [0, 1, 2, 3]) -> [P] { + if dim.count == 3 { + return texture2tensor_3(texture: texture, dim: dim, transpose: transpose) + } else if dim.count == 2 { + return texture2tensor_2(texture: texture, dim: dim, transpose: transpose) + } else if dim.count == 1 { + return texture2tensor_1(texture: texture, dim: dim, transpose: transpose) + } var tdim: [Int] = [1, 1, 1, 1] for i in 0...size - let bpI = ndim[1] * bpR - let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: ndim[2], height: ndim[1], depth: 1)) - for i in 0.. = UnsafeMutablePointer

.allocate(capacity: ndim[1] * ndim[2] * 4 * MemoryLayout

.size) - texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i) - - for h in 0..: Operator, BatchNormParam throw error } } + + func delogOutput() { + print(" \(type) output: ") + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) + } } - - - - - diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift index eb5cf7d02d..f1277272a7 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/BilinearInterpOp.swift @@ -53,15 +53,10 @@ class BilinearInterpOp: Operator, Bili func delogOutput() { print(" \(type) output: ") - let padToFourDim = para.output.padToFourDim - if para.output.transpose == [0, 1, 2, 3] { - let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) - print(outputArray.strideArray()) - } else if para.output.transpose == [0, 2, 3, 1] { - print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray()) - } else { - fatalError(" not implemet") - } + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) +// print(outputArray) + print(outputArray.strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift index 1abdb66aa7..0a30b2bbef 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift @@ -65,15 +65,10 @@ class ConcatOp: Operator, ConcatParam

>, Run func delogOutput() { print(" \(type) output: ") - let padToFourDim = para.output.padToFourDim - if para.output.transpose == [0, 1, 2, 3] { - let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) - print(outputArray.strideArray()) - } else if para.output.transpose == [0, 2, 3, 1] { - print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray()) - } else { - fatalError(" not implemet") - } + + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift index caa56ba256..dad8d0c6ac 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift @@ -20,12 +20,13 @@ class BatchNormKernel: Kernel, Computable { let varianceP = param.variance.data.pointer let meanP = param.mean.data.pointer let scaleP = param.scale.data.pointer - let biasP = param.scale.data.pointer + let biasP = param.bias.data.pointer for i in 0..: Kernel, Computable{ encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) - let ratio_h: Float32 = Float32(param.input.tensorDim.dims[2]) / Float32(param.output.tensorDim.dims[2]) - let ratio_w: Float32 = Float32(param.input.tensorDim.dims[3]) / Float32(param.output.tensorDim.dims[3]) + var ratio_h: Float32 = 0 + var ratio_w: Float32 = 0 + if param.output.tensorDim.dims[2] > 1 { + ratio_h = Float32(param.input.tensorDim.dims[2]-1) / Float32(param.output.tensorDim.dims[2]-1) + } + if param.output.tensorDim.dims[3] > 1 { + ratio_w = Float32(param.input.tensorDim.dims[3]-1) / Float32(param.output.tensorDim.dims[3]-1) + } var p = BilinearInterpMetalParam.init(ratio_h: ratio_h, ratio_w: ratio_w) - encoder.setBytes(&p, length: MemoryLayout.size, index: 0) + encoder.setBytes(&p, length: MemoryLayout.size, index: 0) encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) encoder.endEncoding() } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal index 657187211e..96333a07a9 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BatchNormKernel.metal @@ -17,14 +17,14 @@ using namespace metal; kernel void batchnorm(texture2d_array inTexture [[texture(0)]], texture2d_array outTexture [[texture(1)]], - const device float4 * newScale [[buffer(0)]], - const device float4 * newBias [[buffer(1)]], + const device float4 * nscale [[buffer(0)]], + const device float4 * nbias [[buffer(1)]], uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || gid.y >= outTexture.get_height() || gid.z >= outTexture.get_array_size()) return; const float4 input = inTexture.read(gid.xy, gid.z); - float4 output = input * newScale[gid.z] + newBias[gid.z]; + float4 output = input * nscale[gid.z] + nbias[gid.z]; outTexture.write(output, gid.xy, gid.z); } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal index cd6971bfda..0dc8775408 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.inc.metal @@ -14,8 +14,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array input [[te if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) { r = input.read(gid.xy, gid.z); } else { - float w = gid.x * pm.ratio_w; - float h = gid.y * pm.ratio_h; + P w = gid.x * pm.ratio_w; + P h = gid.y * pm.ratio_h; uint w0 = w, h0 = h; uint w1 = w0 + 1, h1 = h0 + 1; P w1lambda = w - w0, h1lambda = h - h0; @@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array input [[te VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z); VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z); VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z); - r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3); + r = h2lambda * (w2lambda * r0 + w1lambda * r1) + + h1lambda * (w2lambda * r2 + w1lambda * r3); } output.write(r, gid.xy, gid.z); } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal index c4eca3e1af..394cf89db0 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal @@ -16,8 +16,6 @@ using namespace metal; struct bilinear_interp_param { -// int32_t out_h; -// int32_t out_w; float ratio_h; float ratio_w; }; diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal index 9858cf9c3c..40bae035c0 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal @@ -17,16 +17,16 @@ using namespace metal; inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) { - abcd[0] = abcd[1] = abcd[2] = 1; + abcd[0] = abcd[1] = abcd[2] = 0; abcd[3] = xyzn[0] * 4 + xyzn[3]; } inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) { - abcd[0] = abcd[1] = 1; + abcd[0] = abcd[1] = 0; abcd[2] = xyzn[1]; abcd[3] = xyzn[0] * 4 + xyzn[3]; } inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) { - abcd[0] = 1; + abcd[0] = 0; abcd[3] = xyzn[0]; abcd[2] = xyzn[1]; abcd[1] = xyzn[2] * 4 + xyzn[3]; @@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) { } inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) { - xyzn[1] = xyzn[2] = 1; + xyzn[1] = xyzn[2] = 0; xyzn[0] = abcd[3] / 4; xyzn[1] = abcd[3] % 4; } inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) { - xyzn[2] = 1; + xyzn[2] = 0; xyzn[1] = abcd[2]; xyzn[0] = abcd[3] / 4; - xyzn[1] = abcd[3] % 4; + xyzn[3] = abcd[3] % 4; } inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) { xyzn[0] = abcd[3]; diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal index 0eacaf658b..b62daaa3aa 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal @@ -122,20 +122,20 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex int x = gid.x - pm.offset; if (x < 0) return; if (x < pm.vdim[0]) { - VECTOR(P, 4) r = in0.read(gid.xy, gid.z); + VECTOR(P, 4) r = in0.read(gid.xy, gid.z); out.write(r, gid.xy, gid.z); return; } x -= pm.vdim[0]; if (x < pm.vdim[1]) { - VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z); + VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z); out.write(r, gid.xy, gid.z); return; } #if N >= 3 x -= pm.vdim[1]; if (x < pm.vdim[2]) { - VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z); + VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z); out.write(r, gid.xy, gid.z); return; } @@ -143,7 +143,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex #if N >= 4 x -= pm.vdim[2]; if (x < pm.vdim[3]) { - VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z); + VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z); out.write(r, gid.xy, gid.z); return; } @@ -151,7 +151,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex #if N >= 5 x -= pm.vdim[3]; if (x < pm.vdim[4]) { - VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z); + VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z); out.write(r, gid.xy, gid.z); return; } @@ -159,7 +159,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex #if N >= 6 x -= pm.vdim[4]; if (x < pm.vdim[5]) { - VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z); + VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z); out.write(r, gid.xy, gid.z); return; } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal index 3d6c141210..82d512e709 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal @@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array inTextu if (index < count) { index2abcd(lrp.idim, index, tabcd); trans(lrp.itrans, tabcd, iabcd); - abcd2xyzn(iC, iabcd, ixyzn); #if RIN == 4 abcd2xyzn_4(iC, iabcd, ixyzn); #else diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift index 4a27fba983..72665c471f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift @@ -72,10 +72,21 @@ class PriorBoxOp: Operator, PriorBoxParam

print(" \(type) output: ") // output - let outputArray = para.output.metalTexture.float32Array() - print(outputArray) +// let outputArray = para.output.metalTexture.float32Array() +// print(outputArray.strideArray()) + let device = para.input.metalTexture!.device + let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3]) + let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3]) + print("boxes: ") + print(boxes.strideArray()) + print("variances: ") + print(variances.strideArray()) // output -// print(" \(type) output: ") + print(" \(type) output: ") + + print(para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])).strideArray()) +// print(para.output.realNHWC().strideArray()) + // let padToFourDim = para.output.padToFourDim // if para.output.transpose == [0, 1, 2, 3] { // let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift index c9f054c88a..ca19b720c6 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift @@ -47,6 +47,9 @@ class ReluOp: Operator, ReluParam

>, Runable, func delogOutput() { print(" \(type) output: ") print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift index bd257a65f3..ac46baca91 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift @@ -69,15 +69,9 @@ class ReshapeOp: Operator, ReshapeParam

>, } func delogOutput() { print("reshape delog") -// let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false) -// -// let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false) - let padToFourDim = para.output.padToFourDim - - let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) -// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray()) - + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) print(outputArray.strideArray()) - +// print(outputArray) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift index 41bf6784f5..b68d38e573 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift @@ -64,6 +64,11 @@ class SplitOp: Operator, SplitParam

>, Runabl func delogOutput() { print(" \(type) output: ") + let device = para.input.metalTexture!.device + for out in para.outputList { + let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose) + print(arr.strideArray()) + } } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift index 0213b52bf1..a90aa288b3 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift @@ -57,6 +57,9 @@ class TransposeOp: Operator, TransposeParam } else { print(" not implement") } + let device = para.output.metalTexture!.device + let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) + print(outputArray.strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift index cd329394c6..b43ea3742b 100644 --- a/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift +++ b/metal/paddle-mobile/paddle-mobile/PaddleMobile.swift @@ -16,7 +16,7 @@ import Foundation class ScaleKernel: CusomKernel { init(device: MTLDevice, shape: Shape) { - super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false) + super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false) } } diff --git a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift index f76d86c472..85cf80bd09 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Executor.swift @@ -14,10 +14,10 @@ import Foundation -let testTo = 2 +let testTo = 113 var isTest = false -let computePrecision: ComputePrecision = .Float16 +let computePrecision: ComputePrecision = .Float32 public class ResultHolder { public let dim: [Int] @@ -120,10 +120,10 @@ public class Executor { let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2])) print(inputArr.strideArray()) -// print(dim) -// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr) -// -// print("write to library done") + print(dim) + writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr) + + print("write to library done") // return // print(inputArr) // -- GitLab