diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj index cd98098345cf526fbfbb710749112951571a4ec5..b71786f3961d0e7959af252909884531444773d6 100644 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj @@ -7,6 +7,8 @@ objects = { /* Begin PBXBuildFile section */ + 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; }; + 4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; }; D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; }; FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; }; @@ -85,6 +87,8 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = ""; }; + 4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = ""; }; CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = ""; }; DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = ""; }; @@ -349,6 +353,8 @@ children = ( FC1B16B220EC9A4F00678B91 /* Kernels.metal */, FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, + 4AF928762133F1DB005B6C3A /* BoxCoder.metal */, + 4AF9287821341661005B6C3A /* Softmax.metal */, FCEB6849212F00DB00D2448E /* PreluKernel.metal */, FCDDC6C9212FDF6800E5EF74 /* BatchNormKernel.metal */, FCDDC6CB212FDFDB00E5EF74 /* ReluKernel.metal */, @@ -461,6 +467,7 @@ files = ( FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */, FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, + 4AF9287921341661005B6C3A /* Softmax.metal in Sources */, FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */, FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */, FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */, @@ -478,6 +485,7 @@ FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, + 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */, FCEB684C212F093800D2448E /* PreluOp.swift in Sources */, FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */, FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */, diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift index ec7d7134a713dc99a3cecf1052500f72d83f0a56..5d2d5b1c7af5d9822394d2e7de9b251085c035dc 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift @@ -14,20 +14,31 @@ import Foundation +struct SoftmaxMetalParam { + let N: Int32 + let K: Int32 +} + class SoftmaxKernel: Kernel, Computable{ - - func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam

) throws { - guard let encoder = commandBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encoder is nil") - } - encoder.setTexture(param.input.metalTexture, index: 0) - encoder.setTexture(param.output.metalTexture, index: 1) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + + func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encoder is nil") } + encoder.setTexture(param.input.metalTexture, index: 0) + encoder.setTexture(param.output.metalTexture, index: 1) - required init(device: MTLDevice, param: SoftmaxParam

) { - param.output.initTexture(device: device) - super.init(device: device, inFunctionName: "softmax") - } + var smp = SoftmaxMetalParam.init( + N: Int32(param.input.tensorDim[0]), + K: Int32(param.input.tensorDim[1]) + ) + encoder.setBytes(&smp, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + required init(device: MTLDevice, param: SoftmaxParam

) { + param.output.initTexture(device: device) + super.init(device: device, inFunctionName: "softmax") + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal index a4e9de2bd4f12b67dc947776bf4f7e9ab01618b1..50ad15597b974f75b67bc81a2b49cae4b665bc1f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal @@ -9,8 +9,6 @@ #include using namespace metal; - - inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) { abcd[2] = xyzn[0]; abcd[1] = xyzn[1]; diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal index c94f0551ba0068be6a319b63382980317a88b9c9..fd618e15342bbffb6ff7358e42965e734c42ddda 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal @@ -162,51 +162,6 @@ kernel void pool_half(texture2d_array inTexture [[texture(0) } -kernel void softmax(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int zsize = inTexture.get_array_size(); - float maxv = inTexture.read(uint2(0, 0), 0)[0]; - for (int z = 0; z < zsize; z++) { - float4 r = inTexture.read(uint2(0, 0), z); - maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3]))); - } - float sum = 0; - for (int z = 0; z < zsize; z++) { - float4 r = inTexture.read(uint2(0, 0), z); - sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv); - } - float4 rr = inTexture.read(gid.xy, gid.z); - rr = exp(rr - maxv) / sum; - outTexture.write(rr, gid.xy, gid.z); -} - - -kernel void softmax_half(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - int zsize = inTexture.get_array_size(); - half maxv = inTexture.read(uint2(0, 0), 0)[0]; - for (int z = 0; z < zsize; z++) { - half4 r = inTexture.read(uint2(0, 0), z); - maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3]))); - } - float sum = 0; - for (int z = 0; z < zsize; z++) { - half4 r = inTexture.read(uint2(0, 0), z); - sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv); - } - half4 rr = inTexture.read(gid.xy, gid.z); - rr = exp(rr - maxv) / sum; - outTexture.write(rr, gid.xy, gid.z); -} - struct TransposeParam { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal new file mode 100644 index 0000000000000000000000000000000000000000..ce70c9e6652f5e0be73bebba2f55877837b0b4a7 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Softmax.metal @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct SoftmaxParam { + int N; + int K; +}; + +kernel void softmax(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant SoftmaxParam &sp [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; +// int zsize = inTexture.get_array_size(); + float maxv = inTexture.read(gid.xy, 0)[0]; + int group = sp.K / 4; + int remain = sp.K % 4; + for (int z = 0; z < group; z++) { + float4 r = inTexture.read(gid.xy, z); + maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3])))); + } + if (remain > 0) { + float4 r = inTexture.read(gid.xy, group); + for (int i = 0; i < remain; i++) { + maxv = max(maxv, r[i]); + } + } + float4 rsum = {0, 0, 0, 0}; + for (int z = 0; z < group; z++) { + float4 r = inTexture.read(gid.xy, z); + rsum += exp(r - maxv); + } + float sum = rsum[0] + rsum[1] + rsum[2] + rsum[3]; + if (remain > 0) { + float4 r = inTexture.read(gid.xy, group); + for (int i = 0; i < remain; i++) { + sum += exp(r[i] - maxv); + } + } + float4 rr = inTexture.read(gid.xy, gid.z); + rr = exp(rr - maxv) / sum; + outTexture.write(rr, gid.xy, gid.z); +} +// +//kernel void softmax_half(texture2d_array inTexture [[texture(0)]], +// texture2d_array outTexture [[texture(1)]], +// uint3 gid [[thread_position_in_grid]]) { +// if (gid.x >= outTexture.get_width() || +// gid.y >= outTexture.get_height() || +// gid.z >= outTexture.get_array_size()) return; +// int zsize = inTexture.get_array_size(); +// half maxv = inTexture.read(uint2(0, 0), 0)[0]; +// for (int z = 0; z < zsize; z++) { +// half4 r = inTexture.read(uint2(0, 0), z); +// maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3]))); +// } +// float sum = 0; +// for (int z = 0; z < zsize; z++) { +// half4 r = inTexture.read(uint2(0, 0), z); +// sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv); +// } +// half4 rr = inTexture.read(gid.xy, gid.z); +// rr = exp(rr - maxv) / sum; +// outTexture.write(rr, gid.xy, gid.z); +//} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift index 4c4e910499435294bddfaba717b8690492649072..9bbe07422df07d255a19c86147b4ae9165643e64 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift @@ -23,6 +23,7 @@ class ReshapeParam: OpParam { shape = try ReshapeParam.getAttr(key: "shape", attrs: opDesc.attrs) var s: [Int] = shape.map { Int($0) } + var di = -1 var ml = 1 for i in 0..: OpParam { do { input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) + + assert(input.tensorDim.dims.count == 2) + assert(input.transpose == [0, 1, 2, 3]) + + output.dim = input.dim + output.tensorDim = input.tensorDim + output.originDim = input.originDim } catch let error { throw error } @@ -48,7 +55,9 @@ class SoftmaxOp: Operator, SoftmaxParam

>, } func delogOutput() { print("softmax delog") - let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) - let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) + + let originDim = para.output.originDim + let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3])) + print(outputArray.strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift index 3306aaa4e9b3bcdf74879966b8e825ab893c7a41..1e3390c7e813389c4c13aab91ba146fbaea8a224 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift @@ -49,12 +49,14 @@ class TransposeOp: Operator, TransposeParam } } func delogOutput() { - print(para.input.metalTexture.toTensor(dim: (n: para.input.tensorDim[0], c: para.input.tensorDim[1], h: para.input.tensorDim[2], w: para.input.tensorDim[3])).strideArray()) + print(para.output.metalTexture.realNHWC(dim: (n: para.output.originDim[0], h: para.output.originDim[1], w: para.output.originDim[2], c: para.output.originDim[3])).strideArray()) - - let originDim = para.output.tensorDim - let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3])) - print(outputArray.strideArray()) +// print(para.input.metalTexture.toTensor(dim: (n: para.input.originDim[0], c: para.input.originDim[1], h: para.input.originDim[2], w: para.input.originDim[3])).strideArray()) +// +// +// let originDim = para.output.tensorDim +// let outputArray = para.output.metalTexture.realNHWC(dim: (n: originDim[0], h: originDim[1], w: originDim[2], c: originDim[3])) +// print(outputArray.strideArray()) // let inputArray: [Float32] = para.input.metalTexture.floatArray { (ele: Float32) -> Float32 in