提交 0d00c31a 编写于 作者: L liuruilong

correct buffer

上级 3df1380c
...@@ -29,11 +29,11 @@ class ViewController: UIViewController { ...@@ -29,11 +29,11 @@ class ViewController: UIViewController {
// let queue: MTLCommandQueue // let queue: MTLCommandQueue
func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, complete: @escaping (MTLTexture) -> Void) { func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, complete: @escaping (MTLTexture) -> Void) {
let tmpTextureDes = MTLTextureDescriptor.init() let tmpTextureDes = MTLTextureDescriptor.init()
tmpTextureDes.width = 227 tmpTextureDes.width = 224
tmpTextureDes.height = 227 tmpTextureDes.height = 224
tmpTextureDes.depth = 1 tmpTextureDes.depth = 1
tmpTextureDes.usage = [.shaderRead, .shaderWrite] tmpTextureDes.usage = [.shaderRead, .shaderWrite]
tmpTextureDes.pixelFormat = .rgba16Float tmpTextureDes.pixelFormat = .rgba32Float
tmpTextureDes.textureType = .type2D tmpTextureDes.textureType = .type2D
tmpTextureDes.storageMode = .shared tmpTextureDes.storageMode = .shared
tmpTextureDes.cpuCacheMode = .defaultCache tmpTextureDes.cpuCacheMode = .defaultCache
...@@ -64,23 +64,18 @@ class ViewController: UIViewController { ...@@ -64,23 +64,18 @@ class ViewController: UIViewController {
} }
scaleTexture(queue: queue!, input: inTexture) { (inputTexture) in scaleTexture(queue: queue!, input: inTexture) { (inputTexture) in
let loader = Loader<Float16>.init() let loader = Loader<Float32>.init()
do { do {
let modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" let modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
let paraPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" let paraPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
let program = try loader.load(device: self.device, modelPath: modelPath, paraPath: paraPath) let program = try loader.load(device: self.device, modelPath: modelPath, paraPath: paraPath)
let executor = try Executor<Float16>.init(inDevice: self.device, inQueue: queue!, inProgram: program) let executor = try Executor<Float32>.init(inDevice: self.device, inQueue: queue!, inProgram: program)
let output = try executor.predict(input: inputTexture, expect: [1, 227, 227, 3]) let output = try executor.predict(input: inputTexture, expect: [1, 224, 224, 3])
// print(output) // print(output)
} catch let error { } catch let error {
print(error) print(error)
} }
} }
} }
} }
...@@ -93,7 +93,7 @@ public extension MTLTexture { ...@@ -93,7 +93,7 @@ public extension MTLTexture {
print("texture: \(self)") print("texture: \(self)")
if textureType == .type2DArray { if textureType == .type2DArray {
for i in 0..<arrayLength{ for i in 0..<arrayLength{
var str: String = "slice: \(i): " var str: String = "slice: \(i): \n"
let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment) let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
...@@ -142,8 +142,25 @@ public extension MTLTexture { ...@@ -142,8 +142,25 @@ public extension MTLTexture {
} }
public extension MTLBuffer {
func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
print(header)
print("MTLBuffer: \(self) ")
var str = ""
if stridable && length/MemoryLayout<T>.stride > 1000{
for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
str += " \(contents().assumingMemoryBound(to: T.self)[j])"
}
} else {
for i in 0..<length/MemoryLayout<T>.size {
str += " \(contents().assumingMemoryBound(to: T.self)[i])"
}
}
print(str)
return nil
}
}
......
...@@ -55,17 +55,8 @@ public class Executor<P: PrecisionType> { ...@@ -55,17 +55,8 @@ public class Executor<P: PrecisionType> {
device = inDevice device = inDevice
queue = inQueue queue = inQueue
for block in inProgram.programDesc.blocks { for block in inProgram.programDesc.blocks {
// for i in 0..<2 { for i in 0..<2 {
// let op = block.ops[i] let op = block.ops[i]
// do {
// let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
// op.inferShape()
// ops.append(op)
// } catch let error {
// throw error
// }
// }
for op in block.ops {
do { do {
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope) let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
op.inferShape() op.inferShape()
...@@ -74,6 +65,15 @@ public class Executor<P: PrecisionType> { ...@@ -74,6 +65,15 @@ public class Executor<P: PrecisionType> {
throw error throw error
} }
} }
// for op in block.ops {
// do {
// let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
// op.inferShape()
// ops.append(op)
// } catch let error {
// throw error
// }
// }
} }
} }
...@@ -95,9 +95,9 @@ public class Executor<P: PrecisionType> { ...@@ -95,9 +95,9 @@ public class Executor<P: PrecisionType> {
buffer.addCompletedHandler { (commandbuffer) in buffer.addCompletedHandler { (commandbuffer) in
// for op in self.ops { for op in self.ops {
// op.delogOutput() op.delogOutput()
// } }
let afterDate = Date.init() let afterDate = Date.init()
print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))") print(" encoder end ! time: \(afterDate.timeIntervalSince(beforeDate))")
...@@ -114,7 +114,6 @@ public class Executor<P: PrecisionType> { ...@@ -114,7 +114,6 @@ public class Executor<P: PrecisionType> {
throw PaddleMobileError.netError(message: "output var type error") throw PaddleMobileError.netError(message: "output var type error")
} }
return output return output
} }
......
...@@ -50,7 +50,7 @@ public class Loader<P: PrecisionType> { ...@@ -50,7 +50,7 @@ public class Loader<P: PrecisionType> {
return pointee return pointee
} }
_ = pointerReader(type: UInt32.self) let _ = pointerReader(type: UInt32.self)
let lodLevel = pointerReader(type: UInt64.self) let lodLevel = pointerReader(type: UInt64.self)
for _ in 0..<lodLevel { for _ in 0..<lodLevel {
let size = pointerReader(type: UInt64.self) let size = pointerReader(type: UInt64.self)
...@@ -62,6 +62,7 @@ public class Loader<P: PrecisionType> { ...@@ -62,6 +62,7 @@ public class Loader<P: PrecisionType> {
let _ = pointerReader(type: UInt32.self) let _ = pointerReader(type: UInt32.self)
let tensorDescSize = pointerReader(type: Int32.self) let tensorDescSize = pointerReader(type: Int32.self)
fseek(file, Int(tensorDescSize), SEEK_CUR) fseek(file, Int(tensorDescSize), SEEK_CUR)
nowIndex += Int(tensorDescSize) nowIndex += Int(tensorDescSize)
...@@ -70,21 +71,21 @@ public class Loader<P: PrecisionType> { ...@@ -70,21 +71,21 @@ public class Loader<P: PrecisionType> {
*/ */
//现在模型传入模型为 Float 类型, 这块应该根据模型来 //现在模型传入模型为 Float 类型, 这块应该根据模型来
let tmpCapacity = MemoryLayout<Float>.size * tensor.numel() // let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity); // let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
// let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file) guard bytesRead == tensor.data.size else {
// guard bytesRead == tensor.data.size else { throw PaddleMobileError.loaderError(message: "param read size error")
// throw PaddleMobileError.loaderError(message: "param read size error") }
// }
// TODO: use script to convert // TODO: use script to convert
let bytesRead = fread(tmpPointer, 1, tmpCapacity, file) // let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
for i in 0..<tensor.numel() { // for i in 0..<tensor.numel() {
tensor.data[i] = P.init(inFloat: tmpPointer[i]) // tensor.data[i] = P.init(inFloat: tmpPointer[i])
} // }
tmpPointer.deinitialize(count: tmpCapacity) // tmpPointer.deinitialize(count: tmpCapacity)
tmpPointer.deallocate() // tmpPointer.deallocate()
nowIndex += bytesRead nowIndex += bytesRead
} }
......
...@@ -107,7 +107,16 @@ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKer ...@@ -107,7 +107,16 @@ class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKer
} }
func delogOutput() { func delogOutput() {
let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
para.filter.logDataPointer(header: "filter data pointer: ")
print("filter: \(para.filter)")
print("biase: \(para.bias)")
let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: true)
} }
} }
...@@ -61,8 +61,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam< ...@@ -61,8 +61,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
func delogOutput() { func delogOutput() {
// para.input.mtlTexture.logDesc() // para.input.mtlTexture.logDesc()
let _: Float16? = para.input.mtlTexture.logDesc(header: "feed input: ") // let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
let _: Float16? = para.output.metalTexture.logDesc(header: "feed output: ") // let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: true)
} }
} }
...@@ -29,7 +29,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable { ...@@ -29,7 +29,7 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self) let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride { for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {
let inv = pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5) let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
invs.append(P(inv)) invs.append(P(inv))
} }
...@@ -59,7 +59,6 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable { ...@@ -59,7 +59,6 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable {
} }
print("ConvAddBatchNormReluKernel compute") print("ConvAddBatchNormReluKernel compute")
encoder.setTexture(param.input.metalTexture, index: 0) encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setTexture(param.output.metalTexture, index: 1) encoder.setTexture(param.output.metalTexture, index: 1)
encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0) encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
......
...@@ -59,13 +59,56 @@ kernel void conv3x3(texture2d_array<half, access::sample> inTexture [[texture(0) ...@@ -59,13 +59,56 @@ kernel void conv3x3(texture2d_array<half, access::sample> inTexture [[texture(0)
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> inTexture [[texture(0)]], //kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], // texture2d_array<half, access::write> outTexture [[texture(1)]],
// constant MetalConvParam &param [[buffer(0)]],
// const device half4 *weights [[buffer(1)]],
// const device half4 *biase [[buffer(2)]],
// const device half4 *new_scale [[buffer(3)]],
// const device half4 *new_biase [[buffer(4)]],
// uint3 gid [[thread_position_in_grid]]) {
//
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) {
// return;
// }
//
// short2 posInInput = short2(gid.xy) + short2(param.offsetX, param.offsetY);
// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
// const uint wightSliceCount = 36;
// uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size();
// half4 output = 0.0;
// for (uint i = 0; i < inTexture.get_array_size(); ++i) {
// half4 input[9];
// input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
// input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
// input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
// input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
// input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
// input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
// input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
// input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
// input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
// for (int j = 0; j < 9; ++j) {
// half4 weight = weights[weithTo + wightSliceCount * i + j * 4];
// output += dot(input[j], weight);
// }
// }
//
// output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h);
// outTexture.write(output, gid.xy, gid.z);
//
//}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]], constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]], const device float4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]], const device float4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]], const device float4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]], const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
...@@ -78,9 +121,9 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i ...@@ -78,9 +121,9 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero); constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint wightSliceCount = 36; const uint wightSliceCount = 36;
uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size(); uint weithTo = gid.z * wightSliceCount * inTexture.get_array_size();
half4 output = 0.0; float4 output = 0.0;
for (uint i = 0; i < inTexture.get_array_size(); ++i) { for (uint i = 0; i < inTexture.get_array_size(); ++i) {
half4 input[9]; float4 input[9];
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i); input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i); input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i); input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
...@@ -91,12 +134,12 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i ...@@ -91,12 +134,12 @@ kernel void conv_add_batch_norm_relu_3x3(texture2d_array<half, access::sample> i
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i); input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i); input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) { for (int j = 0; j < 9; ++j) {
half4 weight = weights[weithTo + wightSliceCount * i + j * 4]; float4 weight = weights[weithTo + wightSliceCount * i + j * 4];
output += dot(input[j], weight); output += dot(input[j], weight);
} }
} }
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0h); output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z); outTexture.write(output, gid.xy, gid.z);
} }
......
...@@ -73,16 +73,28 @@ kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0) ...@@ -73,16 +73,28 @@ kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)
outTexture.write(input, gid.xy, gid.z); outTexture.write(input, gid.xy, gid.z);
} }
kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]], //kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]], // texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) { // uint3 gid [[thread_position_in_grid]]) {
// if (gid.x >= inTexture.get_width() ||
// gid.y >= inTexture.get_height()){
// return;
// }
// const half4 input = inTexture.read(gid.xy);
// outTexture.write(input, gid.xy, 0);
//}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() || if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){ gid.y >= inTexture.get_height()){
return; return;
} }
const half4 input = inTexture.read(gid.xy); const float4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0); outTexture.write(input, gid.xy, 0);
} }
...@@ -32,6 +32,9 @@ extension Int64: Attr { ...@@ -32,6 +32,9 @@ extension Int64: Attr {
extension Array: Attr { extension Array: Attr {
} }
extension String: Attr {
}
func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr { func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
switch attrDesc.type { switch attrDesc.type {
case .boolean: case .boolean:
...@@ -39,7 +42,7 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At ...@@ -39,7 +42,7 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At
case .int: case .int:
return Int(attrDesc.i) return Int(attrDesc.i)
case .string: case .string:
return attrDesc.strings return attrDesc.s
case .long: case .long:
return attrDesc.l return attrDesc.l
case .float: case .float:
......
...@@ -38,7 +38,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -38,7 +38,7 @@ class Tensor<P: PrecisionType>: Tensorial {
pointer = inPointer pointer = inPointer
} }
let size: Int let size: Int
fileprivate var pointer: UnsafeMutablePointer<P> var pointer: UnsafeMutablePointer<P>
subscript(index: Int) -> P{ subscript(index: Int) -> P{
get { get {
return pointer[index] return pointer[index]
...@@ -104,7 +104,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -104,7 +104,7 @@ class Tensor<P: PrecisionType>: Tensorial {
for _ in 0..<dim[0] * dim[1] * dim[2] { for _ in 0..<dim[0] * dim[1] * dim[2] {
for j in 0..<paddedC { for j in 0..<paddedC {
if j < C { if j < C {
dstPtr?[j] = data.pointer[j] dstPtr?[j] = tmpPointer[j]
} }
} }
tmpPointer += C tmpPointer += C
...@@ -134,7 +134,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -134,7 +134,7 @@ class Tensor<P: PrecisionType>: Tensorial {
for h in 0..<H{ for h in 0..<H{
for w in 0..<W{ for w in 0..<W{
for c in 0..<C{ for c in 0..<C{
newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * w + w] newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
index += 1 index += 1
} }
} }
...@@ -146,27 +146,25 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -146,27 +146,25 @@ class Tensor<P: PrecisionType>: Tensorial {
extension Tensor { extension Tensor {
var debugDescription: String { var debugDescription: String {
var str = "" var str = "dim: \(dim) \n"
str += "MTLBuffer: \(self.buffer) \n"
// for i in 0..<buffer.length/MemoryLayout<P>.stride { for i in 0..<buffer.length/MemoryLayout<P>.size {
// str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])" str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
// } }
return str return str
}
// var str = ""
// str += "Dim: \(dim) \n value:[ " func logDataPointer(header: String = "") {
// if data.size < 20 { print(header)
// for d in 0..<data.size { var str = ""
// str += " \(data[d]) " str += "data size: \(data.size) \n"
// } str += "dim: \(dim) \n"
// } else { for i in 0..<numel() {
// for d in stride(from: 0, to: data.size, by: data.size/20) { str += " \(data.pointer[i])"
// str += " \(data[d]) " }
// } print(str)
// }
// str += " ]"
// return str
} }
var description: String { var description: String {
......
...@@ -69,7 +69,7 @@ public class Texture<P: PrecisionType>: Tensorial { ...@@ -69,7 +69,7 @@ public class Texture<P: PrecisionType>: Tensorial {
if MemoryLayout<P>.size == 1 { if MemoryLayout<P>.size == 1 {
tmpTextureDes.pixelFormat = .rgba8Unorm tmpTextureDes.pixelFormat = .rgba8Unorm
} else if MemoryLayout<P>.size == 2 { } else if MemoryLayout<P>.size == 2 {
tmpTextureDes.pixelFormat = .rgba16Float tmpTextureDes.pixelFormat = .rgba32Float
} else if MemoryLayout<P>.size == 4 { } else if MemoryLayout<P>.size == 4 {
// tmpTextureDes.pixelFormat = .r32Float // tmpTextureDes.pixelFormat = .r32Float
tmpTextureDes.pixelFormat = .rgba32Float tmpTextureDes.pixelFormat = .rgba32Float
...@@ -130,7 +130,7 @@ extension Texture { ...@@ -130,7 +130,7 @@ extension Texture {
public var debugDescription: String{ public var debugDescription: String{
var str = "" var str = ""
str += "Dim: \(dim) \n value:[ " str += "Dim: \(dim) \n value:[ "
// str += "\(metalTexture)" str += "\(metalTexture)"
str += " ]" str += " ]"
return str return str
} }
......
...@@ -19,7 +19,10 @@ limitations under the License. */ ...@@ -19,7 +19,10 @@ limitations under the License. */
int main() { int main() {
paddle_mobile::Loader<paddle_mobile::CPU> loader; paddle_mobile::Loader<paddle_mobile::CPU> loader;
auto time1 = time(); auto time1 = time();
auto program = loader.Load(g_mobilenet, true); // auto program = loader.Load(g_mobilenet_combine, true);
auto program = loader.Load(g_mobilenet_combine + "/model",
g_mobilenet_combine + "/params", true);
auto time2 = time(); auto time2 = time();
DLOG << "load cost :" << time_diff(time1, time1) << "ms"; DLOG << "load cost :" << time_diff(time1, time1) << "ms";
paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true); paddle_mobile::Executor<paddle_mobile::CPU> executor(program, 1, true);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册