提交 63120052 编写于 作者: C chenhaoze

compataible with webgl1.0; rewrite sorting algorithm of ops; fix bugs

上级 6c40834f
......@@ -151,8 +151,6 @@ export default class imageFeed {
}
}
}
console.log('this is the end of reshapetorgb !!!');
console.dir(result);
return result;
};
......@@ -164,7 +162,6 @@ export default class imageFeed {
* @return {Object} 缩放后的尺寸
*/
reSize(image, params) {
console.log('execute resize!!');
// 原始图片宽高
const width = this.pixelWidth;
const height = this.pixelHeight;
......@@ -192,7 +189,6 @@ export default class imageFeed {
* 根据scale缩放图像并且缩放成目标尺寸并居中
*/
resizeAndFitTargetSize(image, params){
console.log('execute resizeAndFitTargetSize!!');
// 原始图片宽高
const width = this.pixelWidth;
const height = this.pixelHeight;
......@@ -249,7 +245,6 @@ export default class imageFeed {
sh = Math.round(sw * this.pixelHeight / this.pixelWidth);
y = Math.floor((targetHeight - sh) / 2);
}
// console.log(x, y, sw, sh);
if (center) {
this.fromPixels2DContext.drawImage(
image, x, y, sw, sh);
......@@ -327,24 +322,18 @@ export default class imageFeed {
data = this.resizeAndFitTargetSize(pixels, opt);
data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
}
else if (opt.scale) { // 兼容以前的,如果有scale就是短边缩放到scale模式
else if (opt.scale) { // 直接resize到targetShape Humanseg的情况
scaleSize = this.reSize(pixels, opt);
console.dir(scaleSize);
console.dir(pixels);
data = this.getImageData(opt, 0, 0, scaleSize);
data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
}
else if (opt.targetSize) { // 如果有targetSize,就是装在目标宽高里的模式 TinyYolo的情况
scaleSize = this.fitToTargetSize(pixels, opt);
data = this.getImageData(opt, 0, 0, scaleSize);
data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
}
}
if (opt.gray) {
data = grayscale(data);
}
......@@ -359,6 +348,7 @@ export default class imageFeed {
else if (opt.targetShape) {
data = this.allReshapeToRGB(data, opt, scaleSize);
}
return [{data: data, shape: opt.shape || opt.targetShape, name: 'image', canvas: data2}];
}
}
......
......@@ -51,6 +51,10 @@ export default class gpu {
console.log('float extension is started or not? ' + !!this.textureFloat);
}
}
this.maxTextureSize = gl.getParameter(gl.MAX_TEXTURE_SIZE);
this.maxTextureImageUnits = gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS);
// 关闭相关功能
gl.disable(gl.DEPTH_TEST);
gl.disable(gl.STENCIL_TEST);
......@@ -67,14 +71,22 @@ export default class gpu {
this.waits = 0;
console.log('WebGl版本是 ' + this.version);
console.log('MAX_TEXTURE_SIZE is ' + gl.getParameter(gl.MAX_TEXTURE_SIZE));
console.log('MAX_TEXTURE_IMAGE_UNITS is ' + gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS));
console.log('MAX_TEXTURE_SIZE is ' + this.maxTextureSize);
console.log('MAX_TEXTURE_IMAGE_UNITS is ' + this.maxTextureImageUnits);
}
getWebglVersion() {
return this.version;
}
getWebglMaxTextureSize() {
return this.maxTextureSize;
}
getWebglMaxTextureImageUnits() {
return this.maxTextureImageUnits;
}
initCache() {
// 运行次数
this.times = 0;
......@@ -145,7 +157,6 @@ export default class gpu {
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
gl.texImage2D(gl.TEXTURE_2D, // Target, matches bind above.
0, // Level of detail.
this.downloadInternalFormat, // Internal format.
......@@ -346,6 +357,7 @@ export default class gpu {
} else {
// texture = gl.createTexture();
if (isRendered && (iLayer > 0 || (iLayer === 0 && item.tensor !== 'origin'))) {
const tData = this.cacheTextures['' + iLayer];
texture = tData[item.variable + '_' + item.tensor];
} else {
......@@ -361,6 +373,7 @@ export default class gpu {
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
if (this.version == 2){
gl.texImage2D(gl.TEXTURE_2D,
0,
this.internalFormat,
......@@ -369,8 +382,27 @@ export default class gpu {
0,
this.textureFormat,
gl.FLOAT,
item.data,
0);
item.data);
}
else {
let oneSize = item.width_texture * item.height_texture;
let temp = new Float32Array(item.width_texture * item.height_texture * 4);
for (let i = 0; i < item.data.length; i++){
temp[i*4] = (item.data[i]);
temp[i*4+1] = 0;
temp[i*4+2] = 0;
temp[i*4+3] = 0;
}
gl.texImage2D(gl.TEXTURE_2D,
0,
gl.RGBA,
item.width_texture,
item.height_texture,
0,
gl.RGBA,
gl.FLOAT,
temp);
}
}
}
......@@ -389,7 +421,7 @@ export default class gpu {
// 生成帧缓存的texture
makeTexure(type, data, opts = {}) {
const gl = this.gl;
let index = this.textureBufferIndex % 2;
let index = int(mod(float(this.textureBufferIndex), 2.0));
let texture = this.textureBuffer[index];
gl.bindTexture(gl.TEXTURE_2D, texture);
......@@ -429,6 +461,7 @@ export default class gpu {
}
createPBO() {
if (this.version == 2){
const gl2 = this.gl;
const buffer = this.pbo;
gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
......@@ -437,42 +470,40 @@ export default class gpu {
gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, 0);
gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
return buffer;
}
else {
let buffer = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
const gl2 = this.gl;
gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, buffer);
return buffer;
}
}
downloadFoat32TensorFromBuffer(buffer) {
const gl2 = this.gl;
const size = 4 * this.width_texture_out * this.height_texture_out;
if (this.version == 2){
const pixels = new Float32Array(size);
gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
gl2.getBufferSubData(gl2.PIXEL_PACK_BUFFER, 0, pixels);
gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
// log.start('后处理-readloop');
// let result = [];
// let offset = 0;
// for (let h = 0; h < this.height_texture_out; h++) {
// // 纪录第1和2行数据
// let temp1 = [];
// let temp2 = [];
// for (let w = 0; w < this.width_texture_out; w++) {
// temp1.push(pixels[offset]);
// temp1.push(pixels[offset + 1]);
// temp2.push(pixels[offset + 2]);
// temp2.push(pixels[offset + 3]);
// offset += 4;
// }
// result = result.concat(temp1);
// result = result.concat(temp2);
// }
let result = [];
for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
result.push(pixels[4 * i]);
}
// const result = Array.prototype.slice.call(pixels);
// console.dir(['result', result]);
// log.end('后处理-readloop');
return result;
}
else {
let pixels = buffer;
let result = [];
for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
result.push(pixels[4 * i]);
}
return result;
}
}
getWebglError(status) {
const gl2 = this.gl;
switch (status) {
......@@ -497,7 +528,7 @@ export default class gpu {
createAndWaitForFence() {
const gl2 = this.gl;
const isFenceEnabled = (gl2.fenceSync !== null);
const isFenceEnabled = (gl2.fenceSync != null);
let isFencePassed = () => true;
if (isFenceEnabled) {
const sync = gl2.fenceSync(gl2.SYNC_GPU_COMMANDS_COMPLETE, 0);
......@@ -531,10 +562,8 @@ export default class gpu {
let pixels = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
// gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);
const tt2 = +Date.now();
gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels, 0);
gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels);
// console.log('本次读取数据时间是' + (+Date.now() - tt2)+ ',' + (tt2 - tt));
// log.end('后处理-readinside');
// log.start('后处理-readloop');
let result = [];
for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
result.push(pixels[4 * i]);
......
/* eslint-disable */
import GraphExecutor from '../executor/executor';
import IO from '../feed/imageFeed';
import Runtime from '../runtime/runtime';
import OpData from '../utils/opData';
import Factory from '../factory/fshader/factory';
......@@ -92,9 +91,14 @@ export default class Graph {
return;
}
opindex++;
// console.log(opindex);
//if (executor.opData) console.log(executor.opData.iLayer);
executor.execute(this.inst, this.isExecuted);
if (false && executor.opData && opindex >= 184){
console.log('return!');
console.dir(executor);
console.dir(executor.type);
console.dir(this);
return;
}
if (executor.next) {
const id = executor.next;
const next = this.getTensor(id);
......@@ -199,105 +203,63 @@ export default class Graph {
});
}
execute_try(temp, ops, idtoindex, executed, inline, prev){
console.log('execute_try!first look at this op');
console.log(ops[temp]);
let canrun = this.checkifcanrun(temp, ops, idtoindex, executed);
if (canrun === false) {
// console.log('canrun === false!');
var a = inline.pop();
this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, prev);
return;
}
if (prev >=0) {
ops[prev].next = ops[temp].id;
}
ops[temp].outputsName.forEach(function(item, index) {
executed[item] = true;
})
let next = this.getNextByOp(ops, ops[temp]);
// console.log('this is its next:');
// console.dir(next);
while (next.length === 1) {
let flag = true;
for (let i = 0; i < next[0].inputsName.length; i++){
if (executed[next[0].inputsName[i]] === false) flag = false;
}
if (flag === false) {
// console.log('can not execute next now! jump to another op:');
if (inline.length === 0) return;
prev = temp;
let a = inline.pop();
// console.dir(a);
ops[temp].next = a.id;
temp = idtoindex[a.id];
this.execute_try(temp, ops, idtoindex, executed, inline, prev);
return;
}
else {
// console.log('now execute next op! it is');
ops[temp].next = next[0].id;
temp = idtoindex[next[0].id];
// console.dir(ops[temp]);
next = this.getNextByOp(ops, ops[temp]);
// console.log('its next is: ');
ops[temp].outputsName.forEach(function(item, index) {
executed[item] = true;
})
// console.dir(next);
}
}
if (next.length > 1){
// console.log('next.length > 1!!!');
for (let i = next.length - 1; i >=0 ; i--){
inline.push(next[i]);
}
var a = inline.pop();
this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, temp);
}
return;
}
arrangeMap(ops) {
// console.log('arrangeMap!');
// console.dir(ops);
var idtoindex = {};
var executed = {};
var inline = [];
var inIndex = [];
var idtoindex = {};
let temp = 0;
// console.log('graph ops:');
// console.dir(ops);
let ops1 = ops;
ops1.forEach(function(item, index) {
idtoindex[item.id] = index;
// console.dir(item);
item.outputsName.forEach(function(i, idx){
executed[i] = false;
executed[i] = true;
})
});
//ops[0].inputsName[0] = {name : "feed"};
// ops[0].outputsName[0] = {name : "image"};
this.execute_try(temp, ops, idtoindex, executed, inline, -1);
ops1.forEach(function(item, index) {
inIndex[index] = 0;
idtoindex[item.id] = index;
if (item.inputsName.length > 1) {
item.inputsName.forEach(function(i,idx){
if (executed[i] == true) inIndex[index]++;
})
}
else inIndex[index] = item.inputsName.length;
});
this.topoSort(ops, inIndex, idtoindex);
return ops;
}
checkifcanrun(temp, ops, executed){
if (!ops[temp].inputsName) return true;
for (let i = 0; i < ops[temp].inputsName.length; i++){
if (executed[ops[temp].inputsName[i]] === false) return false;
topoSort(ops, inIndex, idtoindex){
var inline = [];
inline.push(ops[0]);
let ops_temp = ops.slice(0);
let prev = null;
let a = ops[0];
while(inline.length > 0){
if (prev != null) ops[idtoindex[prev.id]].next = a.id;
prev = a;
a = inline.pop();
for (let i = 0; i < a.outputsName.length; i++){
for (let k = 0; k < ops_temp.length; k++){
for (let j = 0; j < ops_temp[k].inputsName.length; j++){
if (ops_temp[k].inputsName[j] == a.outputsName[i]) {
inIndex[idtoindex[ops_temp[k].id]]--;
if (inIndex[idtoindex[ops_temp[k].id]] == 0){
inline.push(ops[idtoindex[ops_temp[k].id]]);
ops_temp.splice(k,1);
k--;
break;
}
}
}
}
}
}
return true;
}
/**
* Get Ops Nets Start Node
* @param ops
......@@ -348,8 +310,6 @@ export default class Graph {
* @returns {*}
*/
createOpsMap(ops) {
// console.log('ops!!');
// console.dir(ops);
return ops.map((item, idx) => {
item.idx = idx;
const graphExecutor = new GraphExecutor(item);
......@@ -372,17 +332,6 @@ export default class Graph {
});
}
getNextByOp(ops, op) {
return ops.filter((item, key) => {
for (let i = 0; i < item.inputsName.length; i++) {
for(let j = 0; j < op.outputsName.length; j++) {
if (item.inputsName[i] === op.outputsName[j]) {
return true;
}
}
}
});
}
/**
* dispose
*/
......
......@@ -109,8 +109,9 @@ export default class Loader {
const TMP_REGEX = /\-/;
let requesterArr = arr.map(item => {
if (item.name
&& item.name.match(TMP_SCHEME_REGEX) === null
&& item.name.match(TMP_REGEX) === null) {
// && item.name.match(TMP_SCHEME_REGEX) === null
// && item.name.match(TMP_REGEX) === null
) {
return this.fetchData(item.name).then(data => item.data = data);
}
return Promise.resolve();
......@@ -124,9 +125,9 @@ export default class Loader {
let marker = 0; // 读到哪个位置了
let len; // 当前op长度
arr.filter(item => {
return item.name
&& item.name.match(TMP_SCHEME_REGEX) === null
&& item.name.match(TMP_REGEX) === null;
return item.name;
// && item.name.match(TMP_SCHEME_REGEX) === null
// && item.name.match(TMP_REGEX) === null;
})
// .sort((a, b) => {
// if (a.name > b.name) {
......
......@@ -48,13 +48,9 @@ export default class Paddle {
that.graph = graph;
that.graph.data = artifacts.data;
that.graph.formatWeight(that.graph.data.vars);
const opsMap = that.graph.createOpsMap(that.graph.data.ops, that.graph.data.vars);
const opsMap = that.graph.createOpsMap(that.graph.data.ops);
const opsMap1 = that.graph.constructOpsMap(opsMap);
// console.log('opsMap1!');
// console.dir(opsMap1);
const opsMap2 = that.graph.arrangeMap(opsMap1);
// console.log('opsMap2!');
// console.dir(opsMap2);
that.graph.weightMap = opsMap2;
}
/**
......@@ -68,10 +64,10 @@ export default class Paddle {
this.feed = this.graph.feed = inputs;
// 生成op数据
if (!this.graph.isExecuted) {
this.graph.weightMap.forEach(op => {
this.graph.weightMap.forEach((op, index) => {
const type = op.type;
if (type !== 'feed' && type !== 'fetch') {
console.log(op.type);
that.graph.buildOpData(op);
}
});
......@@ -81,7 +77,6 @@ export default class Paddle {
}
updateFeed() {
this.graph.feedItem.data = this.graph.feed.input[0].data;
// Utils.img2texture(this.graph.feedItem);
}
/**
* dispose
......
......@@ -2,9 +2,6 @@
import Gpu from '../gpu/gpu';
import getMaxUniforms from '../test/getMaxUniforms';
import Factory from '../factory/fshader/factory';
// import {getTextureShapeInfo} from '../utils/opData';
// 生成factory实例
// const factory = new Factory({});
/**
* @file gpu运行时
* @author wangqun@baidu.com, yangmingming@baidu.com
......@@ -29,6 +26,14 @@ export default {
return this.gpu.getWebglVersion();
},
getWebglMaxTextureSize() {
return this.gpu.maxTextureSize();
},
getWebglMaxTextureImageUnits() {
return this.gpu.maxTextureImageUnits();
},
run(opName, opData, isRendered) {
// console.dir(['fscode', opData.fsCode]);
// let time = +Date.now();
......@@ -64,6 +69,7 @@ export default {
this.gpu.render(opData.renderData, opData.iLayer, isRendered);
// }
});
},
/**
......
......@@ -7,29 +7,38 @@
export default `
// 根据tensor坐标获取这个tensor位置的值
float getValueFromTensorPos_TENSOR_NAME(int r, int g, int b, int a) {
vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
vec2(
(float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
(float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
(float(r * height_shape_TENSOR_NAME + b) + 0.5) / float(height_texture_TENSOR_NAME)
)
);
// 只用了r通道
return pixels.r;
}
// 紧凑型布局根据tensor坐标获取这个tensor位置的值
// 超限布局根据tensor坐标获取这个tensor位置的值
float getValueFromTensorPosLimit_TENSOR_NAME(int r, int g, int b, int a) {
float halfW = ceil(float(width_shape_TENSOR_NAME) / 2.0);
int x = int(mod(float(a), halfW));
float pieceW = ceil(float(width_shape_TENSOR_NAME) / 4.0);
int x = int(mod(float(a), pieceW));
int offsetY = 0;
if (a > x) {
if ((float(a) / pieceW) >= 3.0) {
offsetY = 3 * height_shape_TENSOR_NAME;
}
else if (float(a) / pieceW >= 2.0) {
offsetY = 2 * height_shape_TENSOR_NAME;
}
else if (float(a) >= pieceW) {
offsetY = height_shape_TENSOR_NAME;
}
vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
vec2(
(float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
(float(r * 2 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
(float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
(float(r * 4 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
)
);
return pixels.r;
}
`;
......@@ -8,11 +8,13 @@ export default `
precision highp float;
precision highp int;
#else
precision mediump float;
precision mediump int;
precision highp float;
precision highp int;
#endif
varying vec2 vCoord;
varying vec4 outColor;
void setOutput(float result) {
gl_FragColor.r = result;
gl_FragColor.r = result;
}
`;
......@@ -15,17 +15,18 @@ ivec4 getOutputTensorPos() {
return ivec4(b, c, y, x);
}
ivec4 getOutputTensorPosLimit() {
// 获取原始长度
vec2 outCoord = vCoord.xy * _2d_shape_texture_out;
float offsetY = floor(outCoord.y / float(height_shape_out));
int x = int(outCoord.x / float(channel_out));
if (mod(offsetY, 2.0) > 0.0) {
x += int(ceil(float(width_shape_out) / 2.0));
if (mod(offsetY, 4.0) > 0.0) {
x += int(mod(offsetY, 4.0)) * int(ceil(float(width_shape_out) / 4.0));
}
int y = int(mod(outCoord.y, float(height_shape_out)));
int c = int(mod(outCoord.x, float(channel_out)));
int b = int(outCoord.y / float(2 * height_shape_out));
int b = int(outCoord.y / float(4 * height_shape_out));
return ivec4(b, c, y, x);
}
......
......@@ -10,13 +10,13 @@ export default `
ivec4 transferFromNHWCtoNCHW( int sumVal, const int channel, const int width_shape, const int height_shape, const int total_shape) {
int n_origin = int(total_shape/(channel * width_shape * height_shape));
int new_a = sumVal % width_shape;
int new_a = int(mod(float(sumVal), float(width_shape)));
sumVal = int((sumVal - new_a) / width_shape);
int new_b = sumVal % height_shape;
int new_b = int(mod(float(sumVal), float(height_shape)));
sumVal = int((sumVal - new_b) / height_shape);
int new_g = sumVal % channel;
int new_g = int(mod(float(sumVal), float(channel)));
sumVal = int((sumVal - new_g) / channel);
int new_r = sumVal % n_origin;
int new_r = int(mod(float(sumVal), float(n_origin)));
return ivec4(new_r,new_g,new_b,new_a);
}
`;
......@@ -7,14 +7,14 @@ export default `
// start函数
void main(void) {
// 输出数据
ivec4 oPos = getOutputTensorPos();
float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
ivec4 oPos = getOutputTensorPosLIMIT_OUT();
float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);
// 归一化数据
vec4 scale = getPixelsFromTexturePos_texture_scale(vec2( float(oPos.g) / float(width_texture_scale), 0.0));
vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g)) / float(width_texture_bias), 0.0));
vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g)) / float(width_texture_mean), 0.0));
vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g)) / float(width_texture_variance), 0.0));
vec4 scale = getPixelsFromTexturePos_texture_scale(vec2((float(oPos.g) + 0.5) / float(width_texture_scale), 0.0));
vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g) + 0.5) / float(width_texture_bias), 0.0));
vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g) + 0.5) / float(width_texture_mean), 0.0));
vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g) + 0.5) / float(width_texture_variance), 0.0));
float x = (o - mean[0]) / sqrt(variance[0] + epsilon);
float res = scale[0] * x + bias[0];
......
/* eslint-disable */
/**
* @file bilinear_interp的配置文件
* @author chenhaoze
*/
export default {
dep: [
{
func: 'getValueFromTensorPos',
conf: {
TENSOR_NAME: 'origin'
}
},
{
func: 'transferFromNHWCtoNCHW',
conf:{
}
}
],
conf: [
'WIDTH_SHAPE_ORIGIN',
'HEIGHT_SHAPE_ORIGIN',
'LENGTH_SHAPE_ORIGIN',
'WIDTH_TEXTURE_ORIGIN',
'HEIGHT_TEXTURE_ORIGIN',
'CHANNEL_ORIGIN',
'WIDTH_SHAPE_OUT',
'HEIGHT_SHAPE_OUT',
'WIDTH_TEXTURE_OUT',
'HEIGHT_TEXTURE_OUT',
'CHANNEL_OUT',
'OFFSET_Y_OUT',
'MULTI_VALUE',
'BIAS_VALUE',
'ACTIVE_FUNCTION'
],
input: [
{
tensor: 'origin',
variable: 'texture',
setter: 'initTexture',
type: 'texture'
}
]
};
/* eslint-disable */
/**
* @file bilinear_interp主函数
* @author chenhaoze
*/
export default `
// start函数
void main(void) {
// 输出数据
ivec4 oPos = getOutputTensorPos();
// 输出坐标转换为输入坐标
//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
//oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
float scale_x = float(width_shape_out - 1) / float(width_shape_origin - 1);
float scale_y = float(height_shape_out - 1) / float(height_shape_origin - 1);
float x = float(oPos.a) / scale_x;
float y = float(oPos.b) / scale_y;
int x1 = int(floor(x));
int y1 = int(floor(y));
int x2 = int(ceil(x));
int y2 = int(ceil(y));
float dist_x = x - float(x1);
float dist_y = y - float(y1);
float value11 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x1);
float value12 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x1);
float value21 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x2);
float value22 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x2);
float value = (1.0 - dist_x) * (1.0 - dist_y) * value11 +
(1.0 - dist_x) * dist_y * value12 + dist_x * (1.0 - dist_y) * value21 +
dist_x * dist_y * value22;
setOutput(float(value));
}
`;
/* eslint-disable */
/**
* @file bilinear_interp参数文件
* @author chenhaoze
*/
export default `
// 输入数据
const int width_shape_origin = WIDTH_SHAPE_ORIGIN;
const int height_shape_origin = HEIGHT_SHAPE_ORIGIN;
const int length_shape_origin = LENGTH_SHAPE_ORIGIN;
const int width_texture_origin = WIDTH_TEXTURE_ORIGIN;
const int height_texture_origin = HEIGHT_TEXTURE_ORIGIN;
const int channel_origin = CHANNEL_ORIGIN;
const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
const int total_shape_out = TOTAL_SHAPE_OUT;
// 输入数据
uniform sampler2D texture_origin;
`;
......@@ -11,8 +11,8 @@ void main(void) {
// int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
// ivec4 new_oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
float o = 0.0;
if (oPos[dim] > inputs_dim[0] - 1) {
oPos[dim] = oPos[dim] - inputs_dim[0];
if (oPos[dim] > inputs_dim - 1) {
oPos[dim] = oPos[dim] - inputs_dim;
o = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
}
else {
......
......@@ -28,7 +28,7 @@ const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
const int total_shape_out = TOTAL_SHAPE_OUT;
const int dim = DIM;
const int inputs_dim[1] = int[](INPUTS_DIM);
const int inputs_dim = INPUTS_DIM;
// uniform变量
......
......@@ -16,16 +16,8 @@ export default `
int temp_y = 0;
float o = 0.0;
float f = 0.0;
if (x % 2 == 1) x = x - 2;
if (y % 2 == 1) y = y - 2;
// 重排遍历顺序
//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out;
//int new_a = sumVal % width_shape_out;
//int new_b = int((sumVal - new_a) / width_shape_out) % height_shape_out;
//int new_g = int((((sumVal - new_a) / width_shape_out) - new_b) / height_shape_out);
//int x = new_a;
//int c = new_g;
//int y = new_b;
if (int(mod(float(x), 2.0)) == 1) x = x - 2;
if (int(mod(float(y), 2.0)) == 1) y = y - 2;
// 获取output的坐标
int oTensorChannel = int(c * groups / channel_out) * channel_origin;
int oy = y;
......@@ -43,8 +35,7 @@ export default `
}
// channel计算
for (int j = 0; j < channel_origin; j++) {
if (ox % stride_h == 0 && oy % stride_v == 0) {
if (int(mod(float(ox), float(stride_h))) == 0 && int(mod(float(oy), float(stride_v))) == 0) {
temp_x = int(floor(float(ox) / float(stride_h)));
temp_y = int(floor(float(oy) / float(stride_v)));
if (temp_x < width_shape_origin && temp_y < height_shape_origin){
......
......@@ -7,22 +7,22 @@ export default `
// start函数
void main(void) {
// 输出数据
ivec4 oPos = getOutputTensorPos();
float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
ivec4 oPos = getOutputTensorPosLIMIT_OUT();
float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);
ivec4 pos_counter;
float c = 0.0;
if (axis == 1){
c = getValueFromTensorPos_counter(0, oPos.r, oPos.g, oPos.b);
c = getValueFromTensorPosLIMIT_COUNTER_counter(0, oPos.r, oPos.g, oPos.b);
}
else if (axis == 2){
c = getValueFromTensorPos_counter(0, 0, oPos.r, oPos.g);
c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, oPos.r, oPos.g);
}
else if (axis == 3){
c = getValueFromTensorPos_counter(0, 0, 0, oPos.r);
c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, 0, oPos.r);
}
else {
c = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
c = getValueFromTensorPosLIMIT_COUNTER_counter(oPos.r, oPos.g, oPos.b, oPos.a);
}
float res = c + o;
setOutput(float(res));
......
......@@ -7,10 +7,10 @@ export default `
void main(void) {
float res = 0.0;
// 获取output的坐标
ivec4 out_pos = getOutputTensorPos();
ivec4 out_pos = getOutputTensorPosLIMIT_OUT();
for (int j = 0; j < width_shape_origin; j++) {
float c = getValueFromTensorPos_counter(out_pos[0], out_pos[1], j, out_pos[3]);
float o = getValueFromTensorPos_origin(out_pos[0], out_pos[1], out_pos[2], j);
float c = getValueFromTensorPosLIMIT_COUNTER_counter(out_pos[0], out_pos[1], j, out_pos[3]);
float o = getValueFromTensorPosLIMIT_COUNTER_origin(out_pos[0], out_pos[1], out_pos[2], j);
res += c * o;
}
setOutput(res);
......
......@@ -6,7 +6,7 @@
export default `
// start函数
void main(void) {
int length = int(target_value.length() / num);
int length = int(target_length / num);
ivec4 oPos = getOutputTensorPos();
// 输出坐标转换为输入坐标
//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
......
......@@ -18,7 +18,7 @@ const int total_shape_out = TOTAL_SHAPE_OUT;
const int dim = DIM;
const int num = NUM;
const int target_value[TARGET_LENGTH] = int[](TARGET_VALUE);
const int target_length = TARGET_LENGTH;
// 输入数据
......
......@@ -19,10 +19,10 @@ void main(void) {
o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[2], oPos[3]);
}
else if (perm_size == 2) {
o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[min(2 + perm_0, 3)], oPos[min(2 + perm_1, 3)]);
o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[(2 + perm_0)>3?3:(2 + perm_0)], oPos[(2 + perm_1)>3?3:(2 + perm_1)]);
}
else if (perm_size == 3) {
o = getValueFromTensorPos_origin(oPos[0], oPos[min(1 + perm_0, 3)], oPos[min(1 + perm_1, 3)], oPos[min(1 + perm_2, 3)]);
o = getValueFromTensorPos_origin(oPos[0], oPos[(1 + perm_0)>3?3:(1 + perm_0)], oPos[(1 + perm_1)>3?3:(1 + perm_1)], oPos[(1 + perm_2)>3?3:(1 + perm_2)]);
}
else if (perm_size == 4) {
o = getValueFromTensorPos_origin(oPos[perm_0], oPos[perm_1], oPos[perm_2], oPos[perm_3]);
......
......@@ -125,7 +125,6 @@ const mergeType = 'conv2d-elementwise_add';
export default class OpData {
constructor(name, input = {}, output = {}, attrs = {}) {
// console.dir(this);
this.realName = name;
this.name = name;
this.attrs = attrs;
......@@ -205,7 +204,6 @@ export default class OpData {
// 默认取第一个数据
const data = this.output[key] || [{}];
if (tensorName[key.toLowerCase()]) {
// console.dir(this);
data.forEach(item => {
item.tensorName = tensorName[key.toLowerCase()];
tensorData.push(item);
......@@ -453,7 +451,7 @@ export default class OpData {
this.attrs.target_length = dim_value.length;
this.attrs.target_value = dim_value;
// 保存 输入 tensor 对应dim 的长度
this.attrs.inputs_dim = [origin_shape[axis]];
this.attrs.inputs_dim = origin_shape[axis];
this.attrs.dim = 4 - origin_shape.length + axis;
}
......
......@@ -119,12 +119,13 @@ export default {
let offsetY = 0;
// 安卓和ios的max texture size是4096, 改造存储空间(2bh, cw / 2)
let exceedMax = false;
// FIXME:为了让mobilenet能正常执行,这里先注释掉,待群哥修复
// if (height > MAX_TEXTURE_SIZE || width > MAX_TEXTURE_SIZE) {
// height *= 2;
// width = c * (Math.ceil(w / 2));
// exceedMax = true;
// }
// trick TEXTURE_SIZE 超限问题,后续升级更优解
if (height > 4096 || width > 4096) {
//console.error('大小超限', shape);
//height *= 4;
//width = c * (Math.ceil(w / 4));
//exceedMax = true;
}
if (isPacked) {
// 紧凑布局
height = b * c * Math.ceil(h / 2);
......@@ -203,7 +204,7 @@ export default {
return fourDimShape;
},
/*
/*
* 将nhwc排布数据转为nchw排布数据
*/
nhwc2nchw(data, shape) {
......@@ -226,7 +227,7 @@ export default {
return nchwData;
},
/*
/*
* 将nchw排布数据转为nhwc排布数据
*/
nchw2nhwc(data, shape) {
......@@ -249,9 +250,9 @@ export default {
return nhwcData;
},
/*
/*
* 等距间隔打印数据
*/
*/
stridePrint(data, count = 20) {
let realPrintCount = count;
if (data.length <= realPrintCount) {
......@@ -267,10 +268,10 @@ export default {
for (let i = 0; i < realPrintCount; i++) {
numbers.push(i * stride + ": " + data[i * stride]);
}
console.log(numbers)
console.log(numbers);
},
/*
/*
* 连续打印数据
*/
continuousPrint(data, count = 100) {
......@@ -282,7 +283,7 @@ export default {
for (let i = 0; i < realPrintCount; i++) {
numbers.push(i + ": " + data[i]);
}
console.log(numbers)
console.log(numbers);
},
softmax(nchwData) {
......@@ -306,6 +307,44 @@ export default {
}
return result;
},
// 针对model final texture输出超限后,inst.read读取数据不对的case
formatReadData(nchwData, nchwShape) {
if (nchwShape.length < 4) {
let batch = [];
for (let i = 0; i < (4 - nchwShape.length); i++) {
batch.push(1);
}
nchwShape = batch.concat(nchwShape);
}
const shape_b = nchwShape[0];
const shape_c = nchwShape[1];
const shape_h = nchwShape[2];
const shape_w = nchwShape[3];
const texture_height = shape_b * shape_h;
const texture_width = shape_c * shape_w;
if (texture_height <= 4096 && texture_width <= 4096) {
return nchwData;
}
let pos = 0;
const formatData = [];
const pieceW = Math.ceil(shape_w / 4); // reshape后的 shape_width
for (let bIndex = 0; bIndex < shape_b; bIndex++) {
for (let cIndex = 0; cIndex < shape_c; cIndex++) {
for (let hIndex = 0; hIndex < shape_h; hIndex++) {
for (let wIndex = 0; wIndex < shape_w; wIndex++) {
pos = Math.floor(wIndex / pieceW) * pieceW * (shape_h - 1) + wIndex + hIndex * pieceW;
pos += bIndex * shape_c * shape_h * shape_w+ cIndex * shape_h * shape_w;
formatData.push(nchwData[pos]);
}
}
}
}
return formatData;
}
};
/* eslint-enable */
此差异已折叠。
......@@ -126,7 +126,6 @@
2,
2
]
},
}
]
}
......@@ -22,7 +22,7 @@ const unitPath = {
'split': 'model.test.split.json'
};
// 制定运行的 op
const modelType = 'split';
const modelType = 'conv2d';
// 制定运行的 op
const unitData = unitPath[modelType];
......@@ -63,13 +63,13 @@ async function run() {
// 获取 NHWC -> NCHW 的 输出
const outputNCHWShape = getOutputShape();
const outputNHWCShape = nchwShape2nhwcShape(outputNCHWShape);
let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);
console.log('result');
console.log(result);
let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);
const formatData = Utils.formatReadData(nchwResult, outputNCHWShape);
console.log('NCHW RESULT');
console.log(nchwResult);
console.log(formatData);
}
run();
......@@ -112,6 +112,7 @@ function nchwShape2nhwcShape(nchw) {
}
batchNCHW = batch.concat(nchw);
}
const N = batchNCHW[0];
const C = batchNCHW[1];
const H = batchNCHW[2];
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册