diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal index 98121381de27c18daac71264155bd2c5c1abd11a..75d9dc618ddb9c8d8da1d33f87c1598d78c1edf0 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Net/PreProcessKernel.metal @@ -80,7 +80,7 @@ kernel void genet_preprocess(texture2d inTexture [[texture( gid.y >= outTexture.get_height()) { return; } - const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f); + const auto means = float4(128.0f, 128.0f, 128.0f, 0.0f); const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid); } diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift index a39bf44d1a0318305f52d96e7e21a4ba1e54ac37..1674546fe5e0a7cc799bae92c8e2586e6e4b704e 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift +++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift @@ -79,7 +79,7 @@ class ViewController: UIViewController { return } do { - let max = 50 + let max = 1 let startDate = Date.init() for i in 0.. { queue = inQueue for block in inProgram.programDesc.blocks { //block.ops.count - for i in 0...shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) @@ -110,13 +110,12 @@ public class Executor { } buffer.addCompletedHandler { (commandbuffer) in -// return; // let inputArr = resInput.floatArray(res: { (p:P) -> P in // return p // }) - -// writeToLibrary(fileName: "input_hand", array: inputArr) +// +// writeToLibrary(fileName: "genet_input_hand", array: inputArr) // print("write to library done") // return // print(inputArr) @@ -125,10 +124,13 @@ public class Executor { // print(stridableInput) // let _: Flo? = input.logDesc(header: "input: ", stridable: true) -// for op in self.ops { -// op.delogOutput() -// } -// return + for i in 0..: Operator, ConvAddParam

>, } func delogOutput() { + print(" \(type) output: ") print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal index 15f6704a90a7e15e8140ae1f6fb4ddc0eae9286a..06c93da59d383e987386eb67073fbea7e99b4e49 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvKernel.metal @@ -356,78 +356,6 @@ kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array inTexture [[texture(0)]], - texture2d_array outTexture [[texture(1)]], - constant MetalConvTransposeParam ¶m [[buffer(0)]], - const device float4 *weights [[buffer(1)]], - uint3 gid [[thread_position_in_grid]]){ - if (gid.x >= outTexture.get_width() || - gid.y >= outTexture.get_height() || - gid.z >= outTexture.get_array_size()) { - return; - } - - int input_array_size = inTexture.get_array_size(); - - uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH; - - uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice); - - constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); - - float4 output; - - for (int w = 0; w < param.kernelW; ++w) { - int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX; - if (input_x < 0 || input_x >= int(inTexture.get_width())) { - continue; - } - - for (int h = 0; h < param.kernelH; ++h) { - int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY; - if (input_y < 0 || input_y >= int(inTexture.get_height())) { - continue; - } - - uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size(); - - for (int slice = 0; slice < input_array_size; ++slice) { - - float4 input; - float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice]; - float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice]; - - float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice]; - - float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice]; - - input = inTexture.sample(sample, float2(input_x, input_x), slice); - output.x += dot(input, kernel_slice); - output.x += dot(input, kernel_slice1); - output.x += dot(input, kernel_slice2); - output.x += dot(input, kernel_slice3); - } - } - } - - outTexture.write(output, gid.xy, gid.z); -} - - // conv #pragma mark -- conv kernel void conv_3x3(texture2d_array inTexture [[texture(0)]], diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal new file mode 100644 index 0000000000000000000000000000000000000000..5c5a499fceae9061dba30307e2c4ecd3b0e89164 --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConvTransposeKernel.metal @@ -0,0 +1,88 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +using namespace metal; + +struct MetalConvTransposeParam{ + ushort kernelW; + ushort kernelH; + + ushort strideX; + ushort strideY; + + ushort paddingX; + ushort paddingY; + + ushort dilationX; + ushort dilationY; +}; + +kernel void conv_transpose(texture2d_array inTexture [[texture(0)]], + texture2d_array outTexture [[texture(1)]], + constant MetalConvTransposeParam ¶m [[buffer(0)]], + const device float4 *weights [[buffer(1)]], + uint3 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height() || + gid.z >= outTexture.get_array_size()) { + return; + } + + int input_array_size = inTexture.get_array_size(); + + uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH; + + uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice); + + constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); + + float4 output; + + for (int w = 0; w < param.kernelW; ++w) { + int input_x = (gid.x - w * param.dilationX + param.paddingX) / param.strideX; + if (input_x < 0 || input_x >= int(inTexture.get_width())) { + continue; + } + + for (int h = 0; h < param.kernelH; ++h) { + int input_y = (gid.y - h * param.dilationY + param.paddingY) / param.strideY; + if (input_y < 0 || input_y >= int(inTexture.get_height())) { + continue; + } + + uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size(); + + for (int slice = 0; slice < input_array_size; ++slice) { + + float4 input; + float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice]; + float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice]; + + float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice]; + + float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice]; + + input = inTexture.sample(sample, float2(input_x, input_x), slice); + output.x += dot(input, kernel_slice); + output.x += dot(input, kernel_slice1); + output.x += dot(input, kernel_slice2); + output.x += dot(input, kernel_slice3); + } + } + } + + outTexture.write(output, gid.xy, gid.z); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift index 54fd7ce8e28ca68e0799cd0865564e9d16b648bc..d2ecaccba2c2c0630dd99cc2d46567e1209c16a3 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift @@ -58,12 +58,16 @@ class PoolOp: Operator, PoolParam

>, Runable, } func delogOutput() { - print("pool2d delog") - let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true) - print(para.ksize) - print(para.stride) - print(para.padding) - print(para.poolType) - let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true) + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + + +// print("pool2d delog") +// let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true) +// print(para.ksize) +// print(para.stride) +// print(para.padding) +// print(para.poolType) +// let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift index 44b509eb66be4ada2a5e46af73d1a97011cd9b85..ec1437a3c2f084dbfa0e4922a305db7bd40b5b8f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/PreluOp.swift @@ -50,8 +50,8 @@ class PreluOp: Operator, PreluParam

>, Runabl } func delogOutput() { - print("softmax delog") - let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) - let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) +// print("softmax delog") +// let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false) +// let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false) } } diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift index 5918d7088539345f99ece23a03cc427cfa62771f..c9f054c88af44ac3f5dd453b4696c7988d01fa8f 100644 --- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift @@ -43,6 +43,12 @@ class ReluOp: Operator, ReluParam

>, Runable, throw error } } + + func delogOutput() { + print(" \(type) output: ") + print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) + } + }