From 0d00c31aae854032aaa0c12f2c218cc874c747e6 Mon Sep 17 00:00:00 2001 From: liuruilong Date: Thu, 12 Jul 2018 10:18:32 +0800 Subject: [PATCH] correct buffer --- .../paddle-mobile-demo/ViewController.swift | 17 ++--- .../paddle-mobile/Common/MetalExtension.swift | 21 ++++++- .../paddle-mobile/Executor.swift | 29 +++++---- .../paddle-mobile/paddle-mobile/Loader.swift | 27 ++++---- .../Operators/ConvAddBatchNormReluOp.swift | 9 +++ .../paddle-mobile/Operators/FeedOp.swift | 4 +- .../Kernels/ConvAddBatchNormReluKernel.swift | 3 +- .../Operators/Kernels/ConvKernel.metal | 63 ++++++++++++++++--- .../Operators/Kernels/Kernels.metal | 20 ++++-- .../paddle-mobile/Program/Attribute.swift | 5 +- .../paddle-mobile/framework/Tensor.swift | 42 ++++++------- .../paddle-mobile/framework/Texture.swift | 4 +- test/net/test_mobilenet.cpp | 5 +- 13 files changed, 164 insertions(+), 85 deletions(-) diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index 7c06d16c14..612897e711 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -29,11 +29,11 @@ class ViewController: UIViewController { // let queue: MTLCommandQueue func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, complete: @escaping (MTLTexture) -> Void) { let tmpTextureDes = MTLTextureDescriptor.init() - tmpTextureDes.width = 227 - tmpTextureDes.height = 227 + tmpTextureDes.width = 224 + tmpTextureDes.height = 224 tmpTextureDes.depth = 1 tmpTextureDes.usage = [.shaderRead, .shaderWrite] - tmpTextureDes.pixelFormat = .rgba16Float + tmpTextureDes.pixelFormat = .rgba32Float tmpTextureDes.textureType = .type2D tmpTextureDes.storageMode = .shared tmpTextureDes.cpuCacheMode = .defaultCache @@ -64,23 +64,18 @@ class ViewController: UIViewController { } scaleTexture(queue: queue!, input: inTexture) { (inputTexture) in - let loader = Loader.init() + let loader = Loader.init() do { let modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" let paraPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" let program = try loader.load(device: self.device, modelPath: modelPath, paraPath: paraPath) - let executor = try Executor.init(inDevice: self.device, inQueue: queue!, inProgram: program) - let output = try executor.predict(input: inputTexture, expect: [1, 227, 227, 3]) + let executor = try Executor.init(inDevice: self.device, inQueue: queue!, inProgram: program) + let output = try executor.predict(input: inputTexture, expect: [1, 224, 224, 3]) // print(output) } catch let error { print(error) } } - - - - } - } diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index 0504250c40..b9380161b1 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -93,7 +93,7 @@ public extension MTLTexture { print("texture: \(self)") if textureType == .type2DArray { for i in 0...size, alignment: MemoryLayout.alignment) let bytesPerRow = width * depth * 4 * MemoryLayout.size let bytesPerImage = width * height * depth * 4 * MemoryLayout.size @@ -142,8 +142,25 @@ public extension MTLTexture { } +public extension MTLBuffer { + func logDesc(header: String = "", stridable: Bool = true) -> T? { + print(header) + print("MTLBuffer: \(self) ") + var str = "" + if stridable && length/MemoryLayout.stride > 1000{ + for j in stride(from: 0, to: length, by: length/MemoryLayout.stride / 100){ + str += " \(contents().assumingMemoryBound(to: T.self)[j])" + } + } else { + for i in 0...size { + str += " \(contents().assumingMemoryBound(to: T.self)[i])" + } + } + print(str) + return nil +} - +} diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift index 7b93b13349..7e4bb233fd 100644 --- a/metal/paddle-mobile/paddle-mobile/Executor.swift +++ b/metal/paddle-mobile/paddle-mobile/Executor.swift @@ -55,17 +55,8 @@ public class Executor { device = inDevice queue = inQueue for block in inProgram.programDesc.blocks { -// for i in 0..<2 { -// let op = block.ops[i] -// do { -// let op = try OpCreator

.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) -// op.inferShape() -// ops.append(op) -// } catch let error { -// throw error -// } -// } - for op in block.ops { + for i in 0..<2 { + let op = block.ops[i] do { let op = try OpCreator

.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) op.inferShape() @@ -74,6 +65,15 @@ public class Executor { throw error } } +// for op in block.ops { +// do { +// let op = try OpCreator

.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) +// op.inferShape() +// ops.append(op) +// } catch let error { +// throw error +// } +// } } } @@ -95,9 +95,9 @@ public class Executor { buffer.addCompletedHandler { (commandbuffer) in -// for op in self.ops { -// op.delogOutput() -// } + for op in self.ops { + op.delogOutput() + } let afterDate = Date.init() print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))") @@ -114,7 +114,6 @@ public class Executor { throw PaddleMobileError.netError(message: "output var type error") } - return output } diff --git a/metal/paddle-mobile/paddle-mobile/Loader.swift b/metal/paddle-mobile/paddle-mobile/Loader.swift index 6e9af2930f..31fd21ebd4 100644 --- a/metal/paddle-mobile/paddle-mobile/Loader.swift +++ b/metal/paddle-mobile/paddle-mobile/Loader.swift @@ -50,7 +50,7 @@ public class Loader { return pointee } - _ = pointerReader(type: UInt32.self) + let _ = pointerReader(type: UInt32.self) let lodLevel = pointerReader(type: UInt64.self) for _ in 0.. { let _ = pointerReader(type: UInt32.self) let tensorDescSize = pointerReader(type: Int32.self) + fseek(file, Int(tensorDescSize), SEEK_CUR) nowIndex += Int(tensorDescSize) @@ -70,21 +71,21 @@ public class Loader { */ //现在模型传入模型为 Float 类型, 这块应该根据模型来 - let tmpCapacity = MemoryLayout.size * tensor.numel() - let tmpPointer = UnsafeMutablePointer.allocate(capacity: tmpCapacity); +// let tmpCapacity = MemoryLayout.size * tensor.numel() +// let tmpPointer = UnsafeMutablePointer.allocate(capacity: tmpCapacity); + let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) -// let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) -// guard bytesRead == tensor.data.size else { -// throw PaddleMobileError.loaderError(message: "param read size error") -// } + guard bytesRead == tensor.data.size else { + throw PaddleMobileError.loaderError(message: "param read size error") + } // TODO: use script to convert - let bytesRead = fread(tmpPointer, 1, tmpCapacity, file) - for i in 0..: Operator: Operator, FeedParam< func delogOutput() { // para.input.mtlTexture.logDesc() - let _: Float16? = para.input.mtlTexture.logDesc(header: "feed input: ") - let _: Float16? = para.output.metalTexture.logDesc(header: "feed output: ") +// let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true) +// let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: true) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift index 28ac80258b..da2fa146d1 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift @@ -29,7 +29,7 @@ class ConvAddBatchNormReluKernel: Kernel, Computable { let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) for i in 0...stride { - let inv = pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) + let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) invs.append(P(inv)) } @@ -59,7 +59,6 @@ class ConvAddBatchNormReluKernel: Kernel, Computable { } print("ConvAddBatchNormReluKernel compute") - encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.output.metalTexture, index: 1) encoder.setBytes(&metalParam, length: MemoryLayout.size, index: 0) diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal index 19fb25804a..9cb8400dc7 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal @@ -59,13 +59,56 @@ kernel void conv3x3(texture2d_array inTexture [[texture(0) outTexture.write(output, gid.xy, gid.z); } -kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], +//kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], +// texture2d_array outTexture [[texture(1)]], +// constant MetalConvParam ¶m [[buffer(0)]], +// const device half4 *weights [[buffer(1)]], +// const device half4 *biase [[buffer(2)]], +// const device half4 *new_scale [[buffer(3)]], +// const device half4 *new_biase [[buffer(4)]], +// uint3 gid [[thread_position_in_grid]]) { +// +// if (gid.x >= outTexture.get_width() || +// gid.y >= outTexture.get_height() || +// gid.z >= outTexture.get_array_size()) { +// return; +// } +// +// short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY); +// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); +// const uint wightSliceCount = 36; +// uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size(); +// half4 output = 0.0; +// for (uint i = 0; i < inTexture.get_array_size(); ++i) { +// half4 input[9]; +// input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); +// input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); +// input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); +// input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i); +// input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i); +// input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i); +// input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i); +// input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); +// input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); +// for (int j = 0; j < 9; ++j) { +// half4 weight = weights[weithTo + wightSliceCount * i + j * 4]; +// output += dot(input[j], weight); +// } +// } +// +// output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h); +// outTexture.write(output, gid.xy, gid.z); +// +//} + + +kernel void conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], constant MetalConvParam ¶m [[buffer(0)]], - const device half4 *weights [[buffer(1)]], - const device half4 *biase [[buffer(2)]], - const device half4 *new_scale [[buffer(3)]], - const device half4 *new_biase [[buffer(4)]], + const device float4 *weights [[buffer(1)]], + const device float4 *biase [[buffer(2)]], + const device float4 *new_scale [[buffer(3)]], + const device float4 *new_biase [[buffer(4)]], uint3 gid [[thread_position_in_grid]]) { if (gid.x >= outTexture.get_width() || @@ -78,9 +121,9 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array i constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); const uint wightSliceCount = 36; uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size(); - half4 output = 0.0; + float4 output = 0.0; for (uint i = 0; i < inTexture.get_array_size(); ++i) { - half4 input[9]; + float4 input[9]; input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); @@ -91,12 +134,12 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array i input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); for (int j = 0; j < 9; ++j) { - half4 weight = weights[weithTo + wightSliceCount * i + j * 4]; + float4 weight = weights[weithTo + wightSliceCount * i + j * 4]; output += dot(input[j], weight); } } - output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h); + output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0); outTexture.write(output, gid.xy, gid.z); } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal index 9b202174de..155f4877fb 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal @@ -73,16 +73,28 @@ kernel void batchnorm(texture2d_array inTexture [[texture(0) outTexture.write(input, gid.xy, gid.z); } -kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { +//kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], +// texture2d_array outTexture [[texture(1)]], +// uint3 gid [[thread_position_in_grid]]) { +// if (gid.x >= inTexture.get_width() || +// gid.y >= inTexture.get_height()){ +// return; +// } +// const half4 input = inTexture.read(gid.xy); +// outTexture.write(input, gid.xy, 0); +//} + +kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + uint3 gid [[thread_position_in_grid]]) { if (gid.x >= inTexture.get_width() || gid.y >= inTexture.get_height()){ return; } - const half4 input = inTexture.read(gid.xy); + const float4 input = inTexture.read(gid.xy); outTexture.write(input, gid.xy, 0); } + diff --git a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift index 924f92ddd5..c26fd2132e 100644 --- a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift +++ b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift @@ -32,6 +32,9 @@ extension Int64: Attr { extension Array: Attr { } +extension String: Attr { +} + func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr { switch attrDesc.type { case .boolean: @@ -39,7 +42,7 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At case .int: return Int(attrDesc.i) case .string: - return attrDesc.strings + return attrDesc.s case .long: return attrDesc.l case .float: diff --git a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift index 50e22bf7a8..dee5d79aa9 100644 --- a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift +++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift @@ -38,7 +38,7 @@ class Tensor: Tensorial { pointer = inPointer } let size: Int - fileprivate var pointer: UnsafeMutablePointer

+ var pointer: UnsafeMutablePointer

subscript(index: Int) -> P{ get { return pointer[index] @@ -104,7 +104,7 @@ class Tensor: Tensorial { for _ in 0..: Tensorial { for h in 0..: Tensorial { extension Tensor { + var debugDescription: String { - var str = "" - -// for i in 0...stride { -// str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" -// } + var str = "dim: \(dim) \n" + str += "MTLBuffer: \(self.buffer) \n" + for i in 0...size { + str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" + } return str - -// var str = "" -// str += "Dim: \(dim) \n value:[ " -// if data.size < 20 { -// for d in 0..: Tensorial { if MemoryLayout

.size == 1 { tmpTextureDes.pixelFormat = .rgba8Unorm } else if MemoryLayout

.size == 2 { - tmpTextureDes.pixelFormat = .rgba16Float + tmpTextureDes.pixelFormat = .rgba32Float } else if MemoryLayout

.size == 4 { // tmpTextureDes.pixelFormat = .r32Float tmpTextureDes.pixelFormat = .rgba32Float @@ -130,7 +130,7 @@ extension Texture { public var debugDescription: String{ var str = "" str += "Dim: \(dim) \n value:[ " -// str += "\(metalTexture)" + str += "\(metalTexture)" str += " ]" return str } diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp index 8400b08f22..abd8dd0e97 100644 --- a/test/net/test_mobilenet.cpp +++ b/test/net/test_mobilenet.cpp @@ -19,7 +19,10 @@ limitations under the License. */ int main() { paddle_mobile::Loader loader; auto time1 = time(); - auto program = loader.Load(g_mobilenet, true); +// auto program = loader.Load(g_mobilenet_combine, true); + auto program = loader.Load(g_mobilenet_combine + "/model", + g_mobilenet_combine + "/params", true); + auto time2 = time(); DLOG << "load cost :" << time_diff(time1, time1) << "ms"; paddle_mobile::Executor executor(program, 1, true); -- GitLab