diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift index 01c9c6c1fc277be1ed5fa6ace6774fc7f03f2de9..11bd1b67e06e3b2d4bcdb100b7afad3848644fc9 100644 --- a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift +++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift @@ -71,7 +71,128 @@ extension MTLDevice { return buffer! } + func texture2tensor_loop
(texture: MTLTexture, cb: ([Int], P)->Void) -> Void { + let bpR = texture.width * 4 * MemoryLayout
.size
+ let bpI = texture.height * bpR
+ let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: 1))
+ for i in 0.. .allocate(capacity: bpI)
+ texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
+ for tx in 0.. .allocate(capacity: ndim[1] * ndim[2] * 4 * MemoryLayout .size)
- texture.getBytes(pointer, bytesPerRow: bpR, bytesPerImage: bpI, from: region, mipmapLevel: 0, slice: i)
-
- for h in 0.. >, Run
func delogOutput() {
print(" \(type) output: ")
- let padToFourDim = para.output.padToFourDim
- if para.output.transpose == [0, 1, 2, 3] {
- let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
- print(outputArray.strideArray())
- } else if para.output.transpose == [0, 2, 3, 1] {
- print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
- } else {
- fatalError(" not implemet")
- }
+
+ let device = para.output.metalTexture!.device
+ let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+ print(outputArray.strideArray())
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
index caa56ba256609f81a109f088824a0e7f9a1532b0..dad8d0c6ac2e5a93273573473c700179f8b90a37 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
@@ -20,12 +20,13 @@ class BatchNormKernel input [[te
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
- float w = gid.x * pm.ratio_w;
- float h = gid.y * pm.ratio_h;
+ P w = gid.x * pm.ratio_w;
+ P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
@@ -26,7 +26,8 @@ kernel void FUNC(bilinear_interp, P)(texture2d_array input [[te
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
- r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
+ r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
index c4eca3e1af7565b3dbef4646b80beb5a2725c714..394cf89db09d47b0d3c87ff124c21a93962c0972 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/BilinearInterp.metal
@@ -16,8 +16,6 @@
using namespace metal;
struct bilinear_interp_param {
-// int32_t out_h;
-// int32_t out_w;
float ratio_h;
float ratio_w;
};
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
index 9858cf9c3c035364ed13bae5c131f8a4a9f199fc..40bae035c097b5ab386d78520b6b04f074eb2fee 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/Common.metal
@@ -17,16 +17,16 @@ using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
- abcd[0] = abcd[1] = abcd[2] = 1;
+ abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
- abcd[0] = abcd[1] = 1;
+ abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
- abcd[0] = 1;
+ abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
@@ -40,15 +40,15 @@ inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
- xyzn[1] = xyzn[2] = 1;
+ xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
- xyzn[2] = 1;
+ xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
- xyzn[1] = abcd[3] % 4;
+ xyzn[3] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
index 0eacaf658b6c5b5e877dc58de289f93bd5873ccf..b62daaa3aa1dd7861a3b00018bc38b409b05d8e0 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ConcatKernel.inc.metal
@@ -122,20 +122,20 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
- VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
+ VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
- VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
+ VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
- VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
+ VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
@@ -143,7 +143,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
- VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
+ VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
@@ -151,7 +151,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
- VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
+ VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
@@ -159,7 +159,7 @@ kernel void FUNC(concat, R, N, VV, P)(texture2d_array in0 [[tex
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
- VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
+ VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
index 3d6c141210ca58ed55b4f75d32640695bac55c1b..82d512e7095007b61d18158a8b9d04071b2b492b 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/metal/ReshapeKernel.inc.metal
@@ -36,7 +36,6 @@ kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array inTextu
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
- abcd2xyzn(iC, iabcd, ixyzn);
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
index 4a27fba9839af5feb709a69a76529c60928a981e..72665c471ff0d1b1a2b7966f45afbf847438138b 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PriorBoxOp.swift
@@ -72,10 +72,21 @@ class PriorBoxOp
print(" \(type) output: ")
// output
- let outputArray = para.output.metalTexture.float32Array()
- print(outputArray)
+// let outputArray = para.output.metalTexture.float32Array()
+// print(outputArray.strideArray())
+ let device = para.input.metalTexture!.device
+ let boxes:[Float32] = device.texture2tensor(texture: para.output.metalTexture!, dim: para.output.tensorDim.dims, transpose: [2,0,1,3])
+ let variances:[Float32] = device.texture2tensor(texture: para.outputVariances.metalTexture!, dim: para.outputVariances.tensorDim.dims, transpose: [2,0,1,3])
+ print("boxes: ")
+ print(boxes.strideArray())
+ print("variances: ")
+ print(variances.strideArray())
// output
-// print(" \(type) output: ")
+ print(" \(type) output: ")
+
+ print(para.output.metalTexture.realNHWC(dim: (para.output.dim[0], para.output.dim[1], para.output.dim[2], para.output.dim[3])).strideArray())
+// print(para.output.realNHWC().strideArray())
+
// let padToFourDim = para.output.padToFourDim
// if para.output.transpose == [0, 1, 2, 3] {
// let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]), texturePrecision: computePrecision)
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
index c9f054c88af44ac3f5dd453b4696c7988d01fa8f..ca19b720c6a3e2559668dcfafb2a6fbf853c571c 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
@@ -47,6 +47,9 @@ class ReluOp >, Runable,
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
+ let device = para.output.metalTexture!.device
+ let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
+ print(outputArray.strideArray())
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
index bd257a65f3cbbe0e4ffc866885fc13d2c0bdb909..ac46baca91bd6eedab9241da68a05d08391ec931 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -69,15 +69,9 @@ class ReshapeOp >,
}
func delogOutput() {
print("reshape delog")
-// let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
-//
-// let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
- let padToFourDim = para.output.padToFourDim
-
- let outputArray: [Float32] = para.output.metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
-// print(para.output.metalTexture.toTensor(dim: (n: padToFourDim[0], c: padToFourDim[1], h: padToFourDim[2], w: padToFourDim[3])).strideArray())
-
+ let device = para.output.metalTexture!.device
+ let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
-
+// print(outputArray)
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
index 41bf6784f51d648f2decfa62d586b94360bdd4be..b68d38e57340bb74b62f06db5b9e984da162dc1e 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SplitOp.swift
@@ -64,6 +64,11 @@ class SplitOp >, Runabl
func delogOutput() {
print(" \(type) output: ")
+ let device = para.input.metalTexture!.device
+ for out in para.outputList {
+ let arr: [Float32] = device.texture2tensor(texture: out.metalTexture, dim: out.tensorDim.dims, transpose: out.transpose)
+ print(arr.strideArray())
+ }
}
}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
index 0213b52bf16fa498835729c5b7e3a65600f7669d..a90aa288b36bfb0eec59b9038813de0960f7aa9a 100644
--- a/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Operators/TransposeOp.swift
@@ -57,6 +57,9 @@ class TransposeOp