提交 54be8342 编写于 作者: D dolphin8 提交者: GitHub

Merge pull request #619 from dolphin8/metal

reshape & softmax & pool fix
......@@ -95,73 +95,88 @@ kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[tex
outTexture.write(input, gid.xy, 0);
}
kernel void pool(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device int * ksize [[buffer(0)]],
const device int * stride [[buffer(1)]],
const device int * padding [[buffer(2)]],
const device int * poolType [[buffer(3)]],
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * stride[0] - padding[0];
int xmax = min(xmin + ksize[0], int(inTexture.get_width()));
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * stride[1] - padding[1];
int ymax = min(ymin + ksize[1], int(inTexture.get_width()));
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (*poolType == 0) {
float4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int32_t x = xmin; x < xmax; x++) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (*poolType == 1) {
for (int32_t x = xmin; x < xmax; x++) {
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= ksize[0] * ksize[1];
r /= pm.ksizeX * pm.ksizeY;
}
// float4 r;
// r[0] = 1.0 * pm.ksizeX;
// r[1] = 2.0;
// r[2] = 3.0;
// r[3] = 4.0;
outTexture.write(r, gid.xy, gid.z);
}
kernel void reshape(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
int zz = gid.y / 4;
int cc = gid.y % 4;
half4 r = inTexture.read(uint2(0, 0), zz);
int zz = gid.x / 4;
int cc = gid.x % 4;
float4 r = inTexture.read(uint2(0, 0), zz);
r[0] = r[cc];
r[1] = 0;
r[2] = 0;
r[3] = 0;
outTexture.write(r, gid.xy, gid.z);
}
kernel void softmax(texture2d<half, access::read> inTexture [[texture(1)]],
texture2d<half, access::write> outTexture [[texture(2)]],
kernel void softmax(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
// int xsize = inTexture.get_width();
int ysize = inTexture.get_height();
half maxv = inTexture.read(uint2(0, 0), gid.z)[0];
for (int y = 0; y < ysize; y++) {
half r = inTexture.read(uint2(0, y), gid.z)[0];
int xsize = inTexture.get_width();
float maxv = inTexture.read(uint2(0, 0), gid.z)[0];
for (int x = 0; x < xsize; x++) {
float r = inTexture.read(uint2(x, 0), gid.z)[0];
maxv = max(maxv, r);
}
half sum = 0;
for (int y = 0; y < ysize; y++) {
half r = inTexture.read(uint2(0, y), gid.z)[0];
float sum = 0;
for (int x = 0; x < xsize; x++) {
float r = inTexture.read(uint2(x, 0), gid.z)[0];
sum += exp(r - maxv);
}
half4 rr = inTexture.read(gid.xy, gid.z);
float4 rr = inTexture.read(gid.xy, gid.z);
rr[0] = exp(rr[0] - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
......@@ -14,6 +14,16 @@
import Foundation
struct PoolMetalParam {
let ksizeX: Int32
let ksizeY: Int32
let strideX: Int32
let strideY: Int32
let paddingX: Int32
let paddingY: Int32
let poolType: Int32
}
class PoolKernel<P: PrecisionType>: Kernel, Computable{
func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
......@@ -22,9 +32,6 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
print("Pool compute")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(UnsafeRawPointer(param.ksize), length: param.ksize.count * 4, index: 0)
encoder.setBytes(UnsafeRawPointer(param.stride), length: param.stride.count * 4, index: 1)
encoder.setBytes(UnsafeRawPointer(param.padding), length: param.padding.count * 4, index: 2)
var poolType: Int32
switch param.poolType {
case "max":
......@@ -34,7 +41,17 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
default:
throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
}
encoder.setBytes(&poolType, length: 4, index: 3)
var pmp = PoolMetalParam.init(
ksizeX: param.ksize[0],
ksizeY: param.ksize[1],
strideX: param.stride[0],
strideY: param.stride[1],
paddingX: param.padding[0],
paddingY: param.padding[1],
poolType: poolType
)
encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -26,6 +26,7 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
print("Reshape compute")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
}
......@@ -23,6 +23,7 @@ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
print("softmax compute")
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1)
encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
encoder.endEncoding()
}
......
......@@ -42,4 +42,9 @@ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>,
throw error
}
}
func delogOutput() {
print("reshape delog")
let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
}
}
......@@ -31,7 +31,7 @@ class SoftmaxParam<P: PrecisionType>: OpParam {
class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
func inferShape() {
para.output.dim = para.input.dim
// para.output.dim = para.input.dim
}
typealias OpType = SoftmaxOp<P>
......@@ -42,4 +42,9 @@ class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>,
throw error
}
}
func delogOutput() {
print("softmax delog")
let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册