diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj index b4f65506a036c4af1c9be1dc5c8498c5c2158843..1da733019f8292b8fe486ffb38a6bb8cba2fcfe0 100644 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj @@ -9,6 +9,8 @@ /* Begin PBXBuildFile section */ 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928762133F1DB005B6C3A /* BoxCoder.metal */; }; 4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; }; + 4AF928822135673D005B6C3A /* Concat.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* Concat.metal */; }; + 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; }; D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; }; FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; }; @@ -90,6 +92,8 @@ /* Begin PBXFileReference section */ 4AF928762133F1DB005B6C3A /* BoxCoder.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BoxCoder.metal; sourceTree = ""; }; 4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = ""; }; + 4AF928812135673D005B6C3A /* Concat.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Concat.metal; sourceTree = ""; }; + 4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = ""; }; CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = ""; }; DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = ""; }; @@ -355,6 +359,8 @@ isa = PBXGroup; children = ( FC27990D21341016000B6BAD /* BoxCoder.metal */, + 4AF928812135673D005B6C3A /* Concat.metal */, + 4AF9288321357BE3005B6C3A /* Elementwise.metal */, FC1B16B220EC9A4F00678B91 /* Kernels.metal */, FC4CB74820F0B954007C0C6D /* ConvKernel.metal */, 4AF928762133F1DB005B6C3A /* BoxCoder.metal */, @@ -478,6 +484,7 @@ FCDE8A33212A917900F4A8F6 /* ConvTransposeOp.swift in Sources */, FCBCCC6B2123071700D94F7E /* BoxcoderOp.swift in Sources */, FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */, + 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, @@ -515,6 +522,7 @@ FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */, FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, + 4AF928822135673D005B6C3A /* Concat.metal in Sources */, FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */, FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */, FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */, diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift index ba9c6de773c4e86f2a2bf68fea7bfa1ec5dc5c4e..361761c7533c891c7937f36b83a4edbb3f2e5726 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ConcatOp.swift @@ -25,6 +25,13 @@ class ConcatParam: OpParam { guard let variant = inScope[x], let v = variant as? Texture

else { fatalError() } + if transpose.count == 0 { + transpose = v.transpose + } + if v.transpose != transpose { + fatalError() + } + input.append(v) } axis = try ConcatParam.getAttr(key: "axis", attrs: opDesc.attrs) @@ -35,6 +42,7 @@ class ConcatParam: OpParam { } var input: [Texture

] = [] var output: Texture

+ var transpose: [Int] = [] let axis: Int } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift index bf17a9ee5faec80701a0e2efb33ad4765aaaf1df..8f95be21148ce5d6cccf8fb8b7e4d0ae54bf00f9 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift @@ -18,36 +18,42 @@ class ElementwiseAddParam: OpParam { typealias ParamPrecisionType = P required init(opDesc: OpDesc, inScope: Scope) throws { do { - inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) - } catch _ { - do { - inputYTexture = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) - } catch let error { - throw error - } - } - do { - input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) + inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs) } catch let error { throw error } + do { + inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) + } catch _ { + let tensorY: Tensor

= try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope) + let device = inputX.metalTexture!.device + inputY = Texture.init(device: device, inDim: tensorY.dim) + let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) + inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims) + } + + var offset = axis + if axis == -1 { + offset = inputX.tensorDim.cout() - inputY.tensorDim.cout() + } + for i in 0..<(inputY.tensorDim.cout()) { + assert(inputX.tensorDim[offset + i] == inputY.tensorDim[i]) + } } - var inputYTexture: Texture

? - var inputY: Tensor

? - var input: Texture

- + var inputX: Texture

+ var inputY: Texture

var output: Texture

- let axis: Int + var axis: Int } class ElementwiseAddOp: Operator, ElementwiseAddParam

>, Runable, Creator, InferShaperable{ typealias OpType = ElementwiseAddOp

func inferShape() { - para.output.dim = para.input.dim +// para.output.dim = para.input.dim } func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift index 9e6e0c368f4f745fc6b2e7ef8d7bef802eb20b41..60f1437e7fabf0ae088b41f37cc01e2981cbf236 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConcatKernel.swift @@ -15,113 +15,117 @@ import Foundation struct ConcatTestParam: TestParam { - var input: [MTLTexture] - var output: MTLTexture - var dims: [[Int]] - var axis: Int - var odim: [Int] + var input: [MTLTexture] + var output: MTLTexture + var dims: [[Int]] + var axis: Int + var odim: [Int] } struct ConcatMetalParam { - var odim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1) - var axis: Int32 = 0 - var offset: Int32 = 0 - var vdim: (Int32, Int32, Int32, Int32, Int32, Int32) = (0, 0, 0, 0, 0, 0) + var odim: (Int32, Int32, Int32, Int32) = (1, 1, 1, 1) + var axis: Int32 = 0 + var offset: Int32 = 0 + var trans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) + var vdim: (Int32, Int32, Int32, Int32, Int32, Int32) = (0, 0, 0, 0, 0, 0) } class ConcatKernel: Kernel, Computable{ - - func encodeTest(_ cmdBuffer: MTLCommandBuffer, _ param: ConcatTestParam, _ istart: Int, _ iend: Int) { - let encoder = cmdBuffer.makeComputeCommandEncoder()! - var p = ConcatMetalParam.init() - var odim: [Int32] = [1, 1, 1, 1] - for i in 0...size, index: 0) - encoder.dispatch(computePipline: pipline, outTexture: param.output) - encoder.endEncoding() + + func encodeTest(_ cmdBuffer: MTLCommandBuffer, _ param: ConcatTestParam, _ istart: Int, _ iend: Int) { + let encoder = cmdBuffer.makeComputeCommandEncoder()! + var p = ConcatMetalParam.init() + var odim: [Int32] = [1, 1, 1, 1] + for i in 0.., _ istart: Int, _ iend: Int) throws { - guard let encoder = cmdBuffer.makeComputeCommandEncoder() else { - throw PaddleMobileError.predictError(message: " encode is nil") - } - var p = ConcatMetalParam.init() - let odim = (0..<4).map { Int32(param.output.dim[$0]) } - p.odim = (odim[0], odim[1], odim[2], odim[3]) - p.axis = Int32(4 - param.output.tensorDim.cout() + param.axis) - for i in 0...size, index: 0) - encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) - encoder.endEncoding() + p.odim = (odim[0], odim[1], odim[2], odim[3]) + p.axis = Int32(4 - param.odim.count + param.axis) + for i in 0..) throws { - for i in 0.. 0 { - try self.encode(commandBuffer, param, 6 * group, param.input.count) - } + var vdim: [Int32] = [] + for i in 0..<(iend - istart) { + encoder.setTexture(param.input[i+istart], index: i) + vdim.append(Int32(param.dims[i+istart][Int(param.axis)])) } - - func test(cmdBuffer: MTLCommandBuffer, param: ConcatTestParam) { - let group = param.input.count / 6 - let remain = param.input.count % 6 - for i in 0.. 0 { - try self.encodeTest(cmdBuffer, param, 6 * group, param.input.count) - } + for i in (iend-istart)..<6 { + encoder.setTexture(param.input[0], index: i) + vdim.append(0) } - - required init(device: MTLDevice, param: ConcatParam

) { - param.output.initTexture(device: device) - super.init(device: device, inFunctionName: "concat") + p.vdim = (vdim[0], vdim[1], vdim[2], vdim[3], vdim[4], vdim[5]) + encoder.setTexture(param.output, index: 6) + encoder.setTexture(param.output, index: 7) + encoder.setBytes(&p, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output) + encoder.endEncoding() + } + + func encode(_ cmdBuffer: MTLCommandBuffer, _ param: ConcatParam

, _ istart: Int, _ iend: Int) throws { + guard let encoder = cmdBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + var p = ConcatMetalParam.init() + let odim = (0..<4).map { Int32(param.output.dim[$0]) } + p.odim = (odim[0], odim[1], odim[2], odim[3]) + p.axis = Int32(4 - param.output.tensorDim.cout() + param.axis) + for i in 0..<4 { + if Int32(param.transpose[i]) == p.axis { + p.axis = Int32(i) + break + } + } + for i in 0...size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() + } + + func compute(commandBuffer: MTLCommandBuffer, param: ConcatParam

) throws { - required init(device: MTLDevice, testParam: ConcatTestParam) { - super.init(device: device, inFunctionName: "concat") + let group = param.input.count / 6 + let remain = param.input.count % 6 + for i in 0.. 0 { + try self.encode(commandBuffer, param, 6 * group, param.input.count) + } + } + + func test(cmdBuffer: MTLCommandBuffer, param: ConcatTestParam) { + let group = param.input.count / 6 + let remain = param.input.count % 6 + for i in 0.. 0 { + self.encodeTest(cmdBuffer, param, 6 * group, param.input.count) } + } + + required init(device: MTLDevice, param: ConcatParam

) { + param.output.initTexture(device: device, inTranspose: param.transpose) + super.init(device: device, inFunctionName: "concat") + } + + required init(device: MTLDevice, testParam: ConcatTestParam) { + super.init(device: device, inFunctionName: "concat") + } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift index 6725cd109410a088945da71b34f6d09080e7052e..81aa9d2ff020b0273915c8ce62dbb24a3c5daa14 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvTransposeKernel.swift @@ -43,6 +43,8 @@ class ConvTransposeKernel: Kernel, Computable{ let dilationY = UInt16(param.dilations[1]) metalParam = MetalConvTransposeParam.init(kernelW: kernelWidth, kernelH: kernelHeight, strideX: strideX, strideY: strideY, paddingX: paddingX, paddingY: paddingY, dilationX: dilationX, dilationY: dilationY) + + param.output.initTexture(device: device, inTranspose: param.input.transpose) } func compute(commandBuffer: MTLCommandBuffer, param: ConvTransposeParam

) throws { diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift index 2050c38c3477917ba2d568504665593f672057d0..0a1aeeb8bf70d187515683034ffc4a2f8157b281 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift @@ -14,14 +14,52 @@ import Foundation +struct ElementwiseAddMetalParam { + var fast: Int32 = 0 + var axis: Int32 = 0 + var yoff: Int32 = 0 + var xdim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) + var xtrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) + var ydim: (Int32, Int32, Int32, Int32) = (0, 0, 0, 0) + var ytrans: (Int32, Int32, Int32, Int32) = (0, 1, 2, 3) +} class ElementwiseAddKernel: Kernel, Computable { required init(device: MTLDevice, param: ElementwiseAddParam

) { super.init(device: device, inFunctionName: "elementwise_add") - param.output.initTexture(device: device, inTranspose: param.input.transpose) + param.output.initTexture(device: device, inTranspose: param.inputX.transpose) } func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam

) throws { + guard let encoder = commandBuffer.makeComputeCommandEncoder() else { + throw PaddleMobileError.predictError(message: " encode is nil") + } + var emp = ElementwiseAddMetalParam.init() + encoder.setTexture(param.inputX.metalTexture, index: 0) + encoder.setTexture(param.inputY.metalTexture, index: 1) + encoder.setTexture(param.output.metalTexture, index: 2) + + let xdim: [Int32] = (0..<4).map { Int32(param.inputX.dim[$0]) } + let ydim: [Int32] = (0..<4).map { Int32(param.inputY.dim[$0]) } + let xtrans: [Int32] = (0..<4).map { Int32(param.inputX.transpose[$0]) } + let ytrans: [Int32] = (0..<4).map { Int32(param.inputY.transpose[$0]) } + + emp.xdim = (xdim[0], xdim[1], xdim[2], xdim[3]) + emp.ydim = (ydim[0], ydim[1], ydim[2], ydim[3]) + emp.xtrans = (xtrans[0], xtrans[1], xtrans[2], xtrans[3]) + emp.ytrans = (ytrans[0], ytrans[1], ytrans[2], ytrans[3]) + if param.axis == -1 { + emp.axis = 4 - Int32(param.inputY.tensorDim.cout()) + } else { + emp.axis = 4 - Int32(param.inputX.tensorDim.cout()) + Int32(param.axis) + } + emp.yoff = 4 - Int32(param.inputY.tensorDim.cout()) + if (param.inputX.dim == param.inputY.dim) && (param.inputX.transpose == param.inputY.transpose) { + emp.fast = 1 + } + encoder.setBytes(&emp, length: MemoryLayout.size, index: 0) + encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture) + encoder.endEncoding() } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Concat.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Concat.metal new file mode 100644 index 0000000000000000000000000000000000000000..09c0e8dadab759bbdf514f347eff3eb005bfac2f --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Concat.metal @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct ConcatParam { + int32_t odim[4]; + int32_t axis; + int32_t offset; + int32_t trans[4]; + int32_t vdim[6]; +}; + +kernel void concat(texture2d_array in0 [[texture(0)]], + texture2d_array in1 [[texture(1)]], + texture2d_array in2 [[texture(2)]], + texture2d_array in3 [[texture(3)]], + texture2d_array in4 [[texture(4)]], + texture2d_array in5 [[texture(5)]], + texture2d_array inx [[texture(6)]], + texture2d_array out [[texture(7)]], + constant ConcatParam & pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + ConcatParam cp = pm; + int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4]; + float4 r; + for (int i = 0; i < 4; i++) { + xyzn[3] = i; + xyzn2abcd(cp.odim[3], xyzn, abcd); + int k = abcd[cp.axis] - cp.offset; + int j = 0; + if (k < 0) { + r[i] = inx.read(gid.xy, gid.z)[i]; + } else { + for (; j < 6; j++) { + if (k < cp.vdim[j]) { + break; + } + k -= cp.vdim[j]; + } + int ta = cp.odim[cp.axis]; + abcd[cp.axis] = k; + cp.odim[cp.axis] = cp.vdim[j]; + abcd2xyzn(cp.odim[3], abcd, oxyzn); + cp.odim[cp.axis] = ta; + switch (j) { + case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; + } + } + } + out.write(r, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal new file mode 100644 index 0000000000000000000000000000000000000000..5ad53a8ef94adde2de6034727b49415ba2cd17c7 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Elementwise.metal @@ -0,0 +1,61 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "Common.metal" + +using namespace metal; + +struct ElementwiseAddParam { + int32_t fast; + int32_t axis; + int32_t yoff; + int32_t xdim[4]; + int32_t xtrans[4]; + int32_t ydim[4]; + int32_t ytrans[4]; +}; + +kernel void elementwise_add(texture2d_array inputX [[texture(0)]], + texture2d_array inputY [[texture(1)]], + texture2d_array outTexture [[texture(2)]], + constant ElementwiseAddParam &pm [[buffer(0)]], + uint3 gid [[thread_position_in_grid]]) { + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) return; + float4 rx, ry; + if (pm.fast == 1) { + rx = inputX.read(gid.xy, gid.z); + ry = inputY.read(gid.xy, gid.z); + } else { + rx = inputX.read(gid.xy, gid.z); + int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4]; + int32_t y_abcd[4] = {1, 1, 1, 1}, y_xyzn[4]; + int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]}; + int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]}; + for (int n = 0; n < 4; n++) { + xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd); + invtrans(xtrans, x_abcd, t_abcd); + for (int k = pm.axis; k < (4 - pm.yoff); k++) { + y_abcd[k+pm.yoff] = t_abcd[k]; + } + trans(ytrans, y_abcd, t_abcd); + abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn); + ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]]; + } + } + float4 r = rx + ry; + outTexture.write(r, gid.xy, gid.z); +} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal index 1e135e609b7000db72768491641cbf178bed074b..a45063b3c09873b7c41f7540d1d33df2b1559e54 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Kernels.metal @@ -43,17 +43,6 @@ kernel void resize(texture2d inTexture [[texture(0)]], outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z); } -kernel void elementwise_add(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - const device half4 *biasTerms [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) return; - constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero); - const half4 input = inTexture.read(gid.xy, gid.z); - outTexture.write(input, gid.xy, gid.z); -} //kernel void texture2d_to_2d_array(texture2d inTexture [[texture(0)]], @@ -200,55 +189,3 @@ kernel void transpose(texture2d_array inTexture [[texture(0 outTexture.write(r, gid.xy, gid.z); } } - -struct ConcatParam { - int32_t odim[4]; - int32_t axis; - int32_t offset; - int32_t vdim[6]; -}; - -kernel void concat(texture2d_array in0 [[texture(0)]], - texture2d_array in1 [[texture(1)]], - texture2d_array in2 [[texture(2)]], - texture2d_array in3 [[texture(3)]], - texture2d_array in4 [[texture(4)]], - texture2d_array in5 [[texture(5)]], - texture2d_array inx [[texture(6)]], - texture2d_array out [[texture(7)]], - constant ConcatParam & pm [[buffer(0)]], - uint3 gid [[thread_position_in_grid]]) { - ConcatParam cp = pm; - int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4]; - float4 r; - for (int i = 0; i < 4; i++) { - xyzn[3] = i; - xyzn2abcd(cp.odim[3], xyzn, abcd); - int k = abcd[cp.axis] - cp.offset; - int j = 0; - if (k < 0) { - r[i] = inx.read(gid.xy, gid.z)[i]; - } else { - for (; j < 6; j++) { - if (k < cp.vdim[j]) { - break; - } - k -= cp.vdim[j]; - } - int ta = cp.odim[cp.axis]; - abcd[cp.axis] = k; - cp.odim[cp.axis] = cp.vdim[j]; - abcd2xyzn(cp.odim[3], abcd, oxyzn); - cp.odim[cp.axis] = ta; - switch (j) { - case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break; - } - } - } - out.write(r, gid.xy, gid.z); -} diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift index d2ecaccba2c2c0630dd99cc2d46567e1209c16a3..6f42f2aa9f8d0515946ace625ed16c5040fd3099 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift @@ -26,6 +26,7 @@ class PoolParam: OpParam { padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs) ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs) globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs) + assert(input.transpose == [0, 2, 3, 1]) } catch let error { throw error }