compataible with webgl1.0; rewrite sorting algorithm of ops; fix bugs

63120052 · chenhaoze · 6c40834f · 63120052 · 63120052 · 63120052
27 changed file
--- a/src/feed/ImageFeed.es6
+++ b/src/feed/ImageFeed.es6
@@ -151,8 +151,6 @@ export default class imageFeed {
                }
            }
        }
-        console.log('this is the end of reshapetorgb !!!');
-        console.dir(result);
        return result;
    };

@@ -164,7 +162,6 @@ export default class imageFeed {
     * @return {Object} 缩放后的尺寸
     */
    reSize(image, params) {
-        console.log('execute resize!!');
        // 原始图片宽高
        const width = this.pixelWidth;
        const height = this.pixelHeight;
@@ -192,7 +189,6 @@ export default class imageFeed {
     * 根据scale缩放图像并且缩放成目标尺寸并居中
     */
    resizeAndFitTargetSize(image, params){
-        console.log('execute resizeAndFitTargetSize!!');
        // 原始图片宽高
        const width = this.pixelWidth;
        const height = this.pixelHeight;
@@ -249,7 +245,6 @@ export default class imageFeed {
            sh = Math.round(sw * this.pixelHeight / this.pixelWidth);
            y = Math.floor((targetHeight - sh) / 2);
        }
-        // console.log(x, y, sw, sh);
        if (center) {
            this.fromPixels2DContext.drawImage(
                image, x, y, sw, sh);
@@ -327,24 +322,18 @@ export default class imageFeed {
                data = this.resizeAndFitTargetSize(pixels, opt);
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
-            else if (opt.scale) { // 兼容以前的，如果有scale就是短边缩放到scale模式
-               
+            else if (opt.scale) { // 直接resize到targetShape Humanseg的情况
                scaleSize = this.reSize(pixels, opt);
-                console.dir(scaleSize);
-                console.dir(pixels);
                data = this.getImageData(opt, 0, 0, scaleSize);
-                
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
            else if (opt.targetSize) { // 如果有targetSize，就是装在目标宽高里的模式 TinyYolo的情况
-                
                scaleSize = this.fitToTargetSize(pixels, opt);
                data = this.getImageData(opt, 0, 0, scaleSize);
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
        }

-
        if (opt.gray) {
            data = grayscale(data);
        }
@@ -359,6 +348,7 @@ export default class imageFeed {
        else if (opt.targetShape) {
            data = this.allReshapeToRGB(data, opt, scaleSize);
        }
+
        return [{data: data, shape: opt.shape || opt.targetShape, name: 'image', canvas: data2}];
    }
 }

--- a/src/gpu/gpu.es6
+++ b/src/gpu/gpu.es6
@@ -51,6 +51,10 @@ export default class gpu {
                console.log('float extension is started or not? ' + !!this.textureFloat);
            }
        }
+
+        this.maxTextureSize = gl.getParameter(gl.MAX_TEXTURE_SIZE);
+        this.maxTextureImageUnits = gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS);
+
        // 关闭相关功能
        gl.disable(gl.DEPTH_TEST);
        gl.disable(gl.STENCIL_TEST);
@@ -67,14 +71,22 @@ export default class gpu {
        this.waits = 0;

        console.log('WebGl版本是 ' + this.version);
-        console.log('MAX_TEXTURE_SIZE is ' + gl.getParameter(gl.MAX_TEXTURE_SIZE));
-        console.log('MAX_TEXTURE_IMAGE_UNITS is ' + gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS));
+        console.log('MAX_TEXTURE_SIZE is ' + this.maxTextureSize);
+        console.log('MAX_TEXTURE_IMAGE_UNITS is ' + this.maxTextureImageUnits);
    }

    getWebglVersion() {
        return this.version;
    }

+    getWebglMaxTextureSize() {
+        return this.maxTextureSize;
+    }
+
+    getWebglMaxTextureImageUnits() {
+        return this.maxTextureImageUnits;
+    }
+
    initCache() {
        // 运行次数
        this.times = 0;
@@ -145,7 +157,6 @@ export default class gpu {
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
-
        gl.texImage2D(gl.TEXTURE_2D, // Target, matches bind above.
            0,             // Level of detail.
            this.downloadInternalFormat,       // Internal format.
@@ -346,6 +357,7 @@ export default class gpu {
        } else {
            // texture = gl.createTexture();
            if (isRendered && (iLayer > 0 || (iLayer === 0 && item.tensor !== 'origin'))) {
+
                const tData = this.cacheTextures['' + iLayer];
                texture = tData[item.variable + '_' + item.tensor];
            } else {
@@ -361,6 +373,7 @@ export default class gpu {
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
+            if (this.version == 2){
            gl.texImage2D(gl.TEXTURE_2D,
                0,
                this.internalFormat,
@@ -369,8 +382,27 @@ export default class gpu {
                0,
                this.textureFormat,
                gl.FLOAT,
-                item.data,
-                0);
+                item.data);
+            }
+            else {
+                let oneSize = item.width_texture * item.height_texture;
+                let temp = new Float32Array(item.width_texture * item.height_texture * 4);
+                for (let i = 0; i < item.data.length; i++){
+                    temp[i*4] = (item.data[i]);
+                    temp[i*4+1] = 0;
+                    temp[i*4+2] = 0;
+                    temp[i*4+3] = 0;
+                }
+                gl.texImage2D(gl.TEXTURE_2D,
+                0,
+                gl.RGBA,
+                item.width_texture,
+                item.height_texture,
+                0,
+                gl.RGBA,
+                gl.FLOAT,
+                temp);
+            }
        }
    }

@@ -389,7 +421,7 @@ export default class gpu {
    // 生成帧缓存的texture
    makeTexure(type, data, opts = {}) {
        const gl = this.gl;
-        let index = this.textureBufferIndex % 2;
+        let index = int(mod(float(this.textureBufferIndex), 2.0));
        let texture = this.textureBuffer[index];
        gl.bindTexture(gl.TEXTURE_2D, texture);

@@ -429,6 +461,7 @@ export default class gpu {
    }

    createPBO() {
+        if (this.version == 2){
        const gl2 = this.gl;
        const buffer = this.pbo;
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
@@ -437,42 +470,40 @@ export default class gpu {
        gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, 0);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
        return buffer;
+        }
+        else {
+        let buffer = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
+        const gl2 = this.gl;
+        gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, buffer);
+        return buffer;
+        }
    }

    downloadFoat32TensorFromBuffer(buffer) {
        const gl2 = this.gl;
        const size = 4 * this.width_texture_out * this.height_texture_out;
+        if (this.version == 2){
        const pixels = new Float32Array(size);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
        gl2.getBufferSubData(gl2.PIXEL_PACK_BUFFER, 0, pixels);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
-        // log.start('后处理-readloop');
-        // let result = [];
-        // let offset = 0;
-        // for (let h = 0; h < this.height_texture_out; h++) {
-        //     // 纪录第1和2行数据
-        //     let temp1 = [];
-        //     let temp2 = [];
-        //     for (let w = 0; w < this.width_texture_out; w++) {
-        //         temp1.push(pixels[offset]);
-        //         temp1.push(pixels[offset + 1]);
-        //         temp2.push(pixels[offset + 2]);
-        //         temp2.push(pixels[offset + 3]);
-        //         offset += 4;
-        //     }
-        //     result = result.concat(temp1);
-        //     result = result.concat(temp2);
-        // }
        let result = [];
        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
            result.push(pixels[4 * i]);
        }
-        // const result = Array.prototype.slice.call(pixels);
-        // console.dir(['result', result]);
-        // log.end('后处理-readloop');
        return result;
+        }
+        else {
+        let pixels = buffer;
+        let result = [];
+        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
+            result.push(pixels[4 * i]);
+        }
+        return result;
+        }
    }

+
    getWebglError(status) {
        const gl2 = this.gl;
        switch (status) {
@@ -497,7 +528,7 @@ export default class gpu {

    createAndWaitForFence() {
        const gl2 = this.gl;
-        const isFenceEnabled = (gl2.fenceSync !== null);
+        const isFenceEnabled = (gl2.fenceSync != null);
        let isFencePassed = () => true;
        if (isFenceEnabled) {
            const sync = gl2.fenceSync(gl2.SYNC_GPU_COMMANDS_COMPLETE, 0);
@@ -531,10 +562,8 @@ export default class gpu {
        let pixels = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
        // gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);
        const tt2 = +Date.now();
-        gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels, 0);
+        gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels);
        // console.log('本次读取数据时间是' + (+Date.now() - tt2)+ ',' + (tt2 - tt));
-        // log.end('后处理-readinside');
-        // log.start('后处理-readloop');
        let result = [];
        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
            result.push(pixels[4 * i]);

--- a/src/graph/graph.es6
+++ b/src/graph/graph.es6
 /* eslint-disable */
 import GraphExecutor from '../executor/executor';
-import IO from '../feed/imageFeed';
 import Runtime from '../runtime/runtime';
 import OpData from '../utils/opData';
 import Factory from '../factory/fshader/factory';
@@ -92,9 +91,14 @@ export default class Graph {
            return;
        }
        opindex++;
-        // console.log(opindex);
-        //if (executor.opData) console.log(executor.opData.iLayer);
        executor.execute(this.inst, this.isExecuted);
+        if (false && executor.opData && opindex >= 184){
+            console.log('return!');
+            console.dir(executor);
+            console.dir(executor.type);
+            console.dir(this);
+            return;
+        }
        if (executor.next) {
            const id = executor.next;
            const next = this.getTensor(id);
@@ -199,105 +203,63 @@ export default class Graph {
        });
    }

-    execute_try(temp, ops, idtoindex, executed, inline, prev){
-        console.log('execute_try!first look at this op');
-        console.log(ops[temp]);
-        let canrun = this.checkifcanrun(temp, ops, idtoindex, executed);
-        if (canrun === false) {
-            // console.log('canrun === false!');
-            var a = inline.pop();
-            this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, prev);
-            return;
-        }
-        if (prev >=0) {
-            ops[prev].next = ops[temp].id;
-        }
-        ops[temp].outputsName.forEach(function(item, index) {
-            executed[item] = true;
-        })
-        let next = this.getNextByOp(ops, ops[temp]);
-        // console.log('this is its next:');
-        // console.dir(next);
-        while (next.length === 1) {
-            let flag = true;
-            for (let i = 0; i < next[0].inputsName.length; i++){
-                if (executed[next[0].inputsName[i]] === false) flag = false;
-            }
-            if (flag === false) {
-                // console.log('can not execute next now! jump to another op:');
-
-                if (inline.length === 0) return;
-                prev = temp;
-                let a = inline.pop();
-                // console.dir(a);
-                ops[temp].next = a.id;
-                temp = idtoindex[a.id];
-                this.execute_try(temp, ops, idtoindex, executed, inline, prev);
-                return;
-            }
-            else {
-                // console.log('now execute next op! it is');
-                ops[temp].next = next[0].id;
-                temp = idtoindex[next[0].id];
-                // console.dir(ops[temp]);
-                next = this.getNextByOp(ops, ops[temp]);
-                // console.log('its next is: ');
-                ops[temp].outputsName.forEach(function(item, index) {
-                    executed[item] = true;
-                })
-                // console.dir(next);
-            }
-        }
-        if (next.length > 1){
-            // console.log('next.length > 1!!!');
-            for (let i = next.length - 1; i >=0 ; i--){
-                 inline.push(next[i]);
-            }
-
-            var a = inline.pop();
-            this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, temp);
-        }
-        return;
-    }


    arrangeMap(ops) {
-        // console.log('arrangeMap!');
-        // console.dir(ops);
-        var idtoindex = {};
        var executed = {};
-        var inline = [];
+        var inIndex = [];
+        var idtoindex = {};
        let temp = 0;
-        // console.log('graph ops:');
-        // console.dir(ops);
        let ops1 = ops;
        ops1.forEach(function(item, index) {
-            idtoindex[item.id] = index;
-            // console.dir(item);
            item.outputsName.forEach(function(i, idx){
-                executed[i] = false;
+                executed[i] = true;
            })
        });

-        //ops[0].inputsName[0] = {name : "feed"};
-       // ops[0].outputsName[0] = {name : "image"};
-        this.execute_try(temp, ops, idtoindex, executed, inline, -1);
+         ops1.forEach(function(item, index) {
+            inIndex[index] = 0;
+            idtoindex[item.id] = index;
+            if (item.inputsName.length > 1) {
+                item.inputsName.forEach(function(i,idx){
+                    if (executed[i] == true) inIndex[index]++;
+                })
+            }
+            else inIndex[index] = item.inputsName.length;
+        });
+        this.topoSort(ops, inIndex, idtoindex);
        return ops;
    }

-    checkifcanrun(temp, ops, executed){
-        if (!ops[temp].inputsName) return true;
-        for (let i = 0; i < ops[temp].inputsName.length; i++){
-                if (executed[ops[temp].inputsName[i]] === false)  return false;
+    topoSort(ops, inIndex, idtoindex){
+        var inline = [];
+        inline.push(ops[0]);
+        let ops_temp = ops.slice(0);
+        let prev = null;
+        let a = ops[0];
+        while(inline.length > 0){
+            if (prev != null) ops[idtoindex[prev.id]].next = a.id;
+            prev = a;
+            a = inline.pop();
+            for (let i = 0; i < a.outputsName.length; i++){
+                for (let k = 0; k < ops_temp.length; k++){
+                    for (let j = 0; j < ops_temp[k].inputsName.length; j++){
+                        if (ops_temp[k].inputsName[j] == a.outputsName[i]) {
+                            inIndex[idtoindex[ops_temp[k].id]]--;
+                            if (inIndex[idtoindex[ops_temp[k].id]] == 0){
+                                inline.push(ops[idtoindex[ops_temp[k].id]]);
+                                ops_temp.splice(k,1);
+                                k--;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
        }
-        return true;
    }


-
-
-
-
    /**
     * Get Ops Nets Start Node
     * @param ops
@@ -348,8 +310,6 @@ export default class Graph {
     * @returns {*}
     */
    createOpsMap(ops) {
-        // console.log('ops!!');
-        // console.dir(ops);
        return ops.map((item, idx) => {
            item.idx = idx;
            const graphExecutor = new GraphExecutor(item);
@@ -372,17 +332,6 @@ export default class Graph {
        });
    }

-    getNextByOp(ops, op) {
-        return ops.filter((item, key) => {
-            for (let i = 0; i < item.inputsName.length; i++) {
-                for(let j = 0; j < op.outputsName.length; j++) {
-                    if (item.inputsName[i] === op.outputsName[j]) {
-                        return true;
-                    }
-                }
-            }
-        });
-    }
    /**
     * dispose
     */

--- a/src/loader/loader.es6
+++ b/src/loader/loader.es6
@@ -109,8 +109,9 @@ export default class Loader  {
        const TMP_REGEX = /\-/;
        let requesterArr = arr.map(item => {
            if (item.name
-                && item.name.match(TMP_SCHEME_REGEX) === null
-                && item.name.match(TMP_REGEX) === null) {
+                // && item.name.match(TMP_SCHEME_REGEX) === null
+                // && item.name.match(TMP_REGEX) === null
+                ) {
                return this.fetchData(item.name).then(data => item.data = data);
            }
            return Promise.resolve();
@@ -124,9 +125,9 @@ export default class Loader  {
        let marker = 0; // 读到哪个位置了
        let len; // 当前op长度
        arr.filter(item => {
-            return item.name
-                && item.name.match(TMP_SCHEME_REGEX) === null
-                && item.name.match(TMP_REGEX) === null;
+            return item.name;
+                // && item.name.match(TMP_SCHEME_REGEX) === null
+                // && item.name.match(TMP_REGEX) === null;
            })
            // .sort((a, b) => {
            //     if (a.name > b.name) {

--- a/src/paddle/paddle.es6
+++ b/src/paddle/paddle.es6
@@ -48,13 +48,9 @@ export default class Paddle {
        that.graph = graph;
        that.graph.data = artifacts.data;
        that.graph.formatWeight(that.graph.data.vars);
-        const opsMap = that.graph.createOpsMap(that.graph.data.ops, that.graph.data.vars);
+        const opsMap = that.graph.createOpsMap(that.graph.data.ops);
        const opsMap1 = that.graph.constructOpsMap(opsMap);
-        // console.log('opsMap1!');
-        // console.dir(opsMap1);
        const opsMap2 = that.graph.arrangeMap(opsMap1);
-        // console.log('opsMap2!');
-        // console.dir(opsMap2);
        that.graph.weightMap = opsMap2;
    }
    /**
@@ -68,10 +64,10 @@ export default class Paddle {
        this.feed = this.graph.feed = inputs;
        // 生成op数据
        if (!this.graph.isExecuted) {
-            this.graph.weightMap.forEach(op => {
+            this.graph.weightMap.forEach((op, index) => {
                const type = op.type;
                if (type !== 'feed' && type !== 'fetch') {
-                    console.log(op.type);
+
                    that.graph.buildOpData(op);
                }
            });
@@ -81,7 +77,6 @@ export default class Paddle {
    }
    updateFeed() {
        this.graph.feedItem.data = this.graph.feed.input[0].data;
-        // Utils.img2texture(this.graph.feedItem);
    }
    /**
     * dispose

--- a/src/runtime/runtime.es6
+++ b/src/runtime/runtime.es6
@@ -2,9 +2,6 @@
 import Gpu from '../gpu/gpu';
 import getMaxUniforms from '../test/getMaxUniforms';
 import Factory from '../factory/fshader/factory';
-// import {getTextureShapeInfo} from '../utils/opData';
-// 生成factory实例
-// const factory = new Factory({});
 /**
 * @file gpu运行时
 * @author wangqun@baidu.com, yangmingming@baidu.com
@@ -29,6 +26,14 @@ export default {
        return this.gpu.getWebglVersion();
    },

+    getWebglMaxTextureSize() {
+        return this.gpu.maxTextureSize();
+    },
+
+    getWebglMaxTextureImageUnits() {
+        return this.gpu.maxTextureImageUnits();
+    },
+
    run(opName, opData, isRendered) {
        // console.dir(['fscode', opData.fsCode]);
        // let time = +Date.now();
@@ -64,6 +69,7 @@ export default {
                this.gpu.render(opData.renderData, opData.iLayer, isRendered);
             // }
        });
+
    },

    /**

--- a/src/shader/atom/getValueFromTensorPos.es6
+++ b/src/shader/atom/getValueFromTensorPos.es6
@@ -7,29 +7,38 @@
 export default `
 // 根据tensor坐标获取这个tensor位置的值
 float getValueFromTensorPos_TENSOR_NAME(int r, int g, int b, int a) {
-    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME, 
+    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
        vec2(
-            (float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME), 
+            (float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
            (float(r * height_shape_TENSOR_NAME + b) + 0.5) / float(height_texture_TENSOR_NAME)
        )
    );
    // 只用了r通道
    return pixels.r;
 }
-// 紧凑型布局根据tensor坐标获取这个tensor位置的值
+
+// 超限布局根据tensor坐标获取这个tensor位置的值
 float getValueFromTensorPosLimit_TENSOR_NAME(int r, int g, int b, int a) {
-    float halfW = ceil(float(width_shape_TENSOR_NAME) / 2.0);
-    int x = int(mod(float(a), halfW));
+    float pieceW = ceil(float(width_shape_TENSOR_NAME) / 4.0);
+    int x = int(mod(float(a), pieceW));
    int offsetY = 0;
-    if (a > x) {
+
+    if ((float(a) / pieceW) >= 3.0) {
+        offsetY = 3 * height_shape_TENSOR_NAME;
+    }
+    else if (float(a) / pieceW >= 2.0) {
+        offsetY = 2 * height_shape_TENSOR_NAME;
+    }
+    else if (float(a) >= pieceW) {
        offsetY = height_shape_TENSOR_NAME;
    }
-    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME, 
+    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
        vec2(
-            (float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME), 
-            (float(r * 2 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
+            (float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
+            (float(r * 4 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
        )
    );
    return pixels.r;
 }
+
 `;
--- a/src/shader/atom/prefix.es6
+++ b/src/shader/atom/prefix.es6
@@ -8,11 +8,13 @@ export default `
    precision highp float;
    precision highp int;
 #else
-    precision mediump float;
-    precision mediump int;
+    precision highp float;
+    precision highp int;
 #endif
-
+    varying vec2 vCoord;
+    varying vec4 outColor;
    void setOutput(float result) {
-        gl_FragColor.r = result;
+            gl_FragColor.r = result;
    }
 `;
+
--- a/src/shader/atom/suffix.es6
+++ b/src/shader/atom/suffix.es6
@@ -15,17 +15,18 @@ ivec4 getOutputTensorPos() {
    return ivec4(b, c, y, x);
 }

+
 ivec4 getOutputTensorPosLimit() {
    // 获取原始长度
    vec2 outCoord = vCoord.xy * _2d_shape_texture_out;
    float offsetY = floor(outCoord.y / float(height_shape_out));
    int x = int(outCoord.x / float(channel_out));
-    if (mod(offsetY, 2.0) > 0.0) {
-        x += int(ceil(float(width_shape_out) / 2.0));
+    if (mod(offsetY, 4.0) > 0.0) {
+        x += int(mod(offsetY, 4.0)) * int(ceil(float(width_shape_out) / 4.0));
    }
    int y = int(mod(outCoord.y, float(height_shape_out)));
    int c = int(mod(outCoord.x, float(channel_out)));
-    int b = int(outCoord.y / float(2 * height_shape_out));
+    int b = int(outCoord.y / float(4 * height_shape_out));
    return ivec4(b, c, y, x);
 }


--- a/src/shader/atom/transferFromNHWCtoNCHW.es6
+++ b/src/shader/atom/transferFromNHWCtoNCHW.es6
@@ -10,13 +10,13 @@ export default `
 ivec4 transferFromNHWCtoNCHW( int sumVal,  const int channel, const int width_shape, const int height_shape, const int total_shape) {

 	int n_origin = int(total_shape/(channel * width_shape * height_shape));
-	int new_a = sumVal % width_shape;
+	int new_a = int(mod(float(sumVal), float(width_shape)));
 	sumVal = int((sumVal - new_a) / width_shape);
-	int new_b = sumVal % height_shape;
+	int new_b = int(mod(float(sumVal), float(height_shape)));
 	sumVal = int((sumVal - new_b) / height_shape);
-	int new_g = sumVal % channel;
+	int new_g = int(mod(float(sumVal), float(channel)));
 	sumVal = int((sumVal - new_g) / channel);
-	int new_r = sumVal % n_origin;
+	int new_r = int(mod(float(sumVal), float(n_origin)));
 	return ivec4(new_r,new_g,new_b,new_a);
 }
 `;
--- a/src/shader/batchnorm/main.es6
+++ b/src/shader/batchnorm/main.es6
@@ -7,14 +7,14 @@ export default `
 // start函数
 void main(void) {
    // 输出数据
-    ivec4 oPos = getOutputTensorPos();
-    float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+    ivec4 oPos = getOutputTensorPosLIMIT_OUT();
+    float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);

    // 归一化数据
-    vec4 scale = getPixelsFromTexturePos_texture_scale(vec2( float(oPos.g) / float(width_texture_scale), 0.0));
-    vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g)) / float(width_texture_bias), 0.0));
-    vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g)) / float(width_texture_mean), 0.0));
-    vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g)) / float(width_texture_variance), 0.0));
+    vec4 scale = getPixelsFromTexturePos_texture_scale(vec2((float(oPos.g) + 0.5) / float(width_texture_scale), 0.0));
+    vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g) + 0.5) / float(width_texture_bias), 0.0));
+    vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g) + 0.5) / float(width_texture_mean), 0.0));
+    vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g) + 0.5) / float(width_texture_variance), 0.0));

    float x = (o - mean[0]) / sqrt(variance[0] + epsilon);
    float res = scale[0] * x + bias[0];

--- a/src/shader/bilinear_interp/conf.es6
+++ b/src/shader/bilinear_interp/conf.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp的配置文件
+ * @author chenhaoze
+ */
+export default {
+    dep: [
+		{
+			func: 'getValueFromTensorPos',
+			conf: {
+				TENSOR_NAME: 'origin'
+			}
+		},
+		{
+			func: 'transferFromNHWCtoNCHW',
+			conf:{
+			}
+		}
+    ],
+    conf: [
+		'WIDTH_SHAPE_ORIGIN',
+		'HEIGHT_SHAPE_ORIGIN',
+		'LENGTH_SHAPE_ORIGIN',
+		'WIDTH_TEXTURE_ORIGIN',
+		'HEIGHT_TEXTURE_ORIGIN',
+		'CHANNEL_ORIGIN',
+		'WIDTH_SHAPE_OUT',
+		'HEIGHT_SHAPE_OUT',
+		'WIDTH_TEXTURE_OUT',
+		'HEIGHT_TEXTURE_OUT',
+		'CHANNEL_OUT',
+		'OFFSET_Y_OUT',
+		'MULTI_VALUE',
+		'BIAS_VALUE',
+		'ACTIVE_FUNCTION'
+    ],
+    input: [
+			{
+				tensor: 'origin',
+				variable: 'texture',
+				setter: 'initTexture',
+				type: 'texture'
+			}
+    ]
+};
--- a/src/shader/bilinear_interp/main.es6
+++ b/src/shader/bilinear_interp/main.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp主函数
+ * @author chenhaoze
+ */
+export default `
+// start函数
+void main(void) {
+    // 输出数据
+	ivec4 oPos = getOutputTensorPos();
+    // 输出坐标转换为输入坐标
+	//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
+ 	//oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
+	float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+	float scale_x = float(width_shape_out - 1) / float(width_shape_origin - 1);
+    float scale_y = float(height_shape_out - 1) / float(height_shape_origin - 1);
+    float x = float(oPos.a) / scale_x;
+    float y = float(oPos.b) / scale_y;
+	int x1 = int(floor(x));
+	int y1 = int(floor(y));
+	int x2 = int(ceil(x));
+	int y2 = int(ceil(y));
+	float dist_x = x - float(x1);
+    float dist_y = y - float(y1);
+    float value11 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x1);
+    float value12 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x1);
+    float value21 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x2);
+    float value22 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x2);
+    float value = (1.0 - dist_x) * (1.0 - dist_y) * value11 +
+            (1.0 - dist_x) * dist_y * value12 + dist_x * (1.0 - dist_y) * value21 +
+            dist_x * dist_y * value22;
+    setOutput(float(value));
+}
+`;
--- a/src/shader/bilinear_interp/params.es6
+++ b/src/shader/bilinear_interp/params.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp参数文件
+ * @author chenhaoze
+ */
+export default `
+// 输入数据
+const int width_shape_origin = WIDTH_SHAPE_ORIGIN;
+const int height_shape_origin = HEIGHT_SHAPE_ORIGIN;
+const int length_shape_origin = LENGTH_SHAPE_ORIGIN;
+const int width_texture_origin = WIDTH_TEXTURE_ORIGIN;
+const int height_texture_origin = HEIGHT_TEXTURE_ORIGIN;
+const int channel_origin = CHANNEL_ORIGIN;
+const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
+const int total_shape_out = TOTAL_SHAPE_OUT;
+// 输入数据
+ uniform sampler2D texture_origin;
+`;
--- a/src/shader/concat/main.es6
+++ b/src/shader/concat/main.es6
@@ -11,8 +11,8 @@ void main(void) {
 //	int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
 //   ivec4 new_oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
    float o = 0.0;
-    if (oPos[dim] > inputs_dim[0] - 1) {
-        oPos[dim] = oPos[dim] - inputs_dim[0];
+    if (oPos[dim] > inputs_dim - 1) {
+        oPos[dim] = oPos[dim] - inputs_dim;
        o = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
    }
    else {

--- a/src/shader/concat/params.es6
+++ b/src/shader/concat/params.es6
@@ -28,7 +28,7 @@ const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
 const int total_shape_out = TOTAL_SHAPE_OUT;

 const int dim = DIM;
-const int inputs_dim[1] = int[](INPUTS_DIM);
+const int inputs_dim = INPUTS_DIM;


 // uniform变量

--- a/src/shader/conv2d_transpose/main.es6
+++ b/src/shader/conv2d_transpose/main.es6
@@ -16,16 +16,8 @@ export default `
        int temp_y = 0;
        float o = 0.0;
        float f = 0.0;
-        if (x % 2 == 1) x = x - 2;
-        if (y % 2 == 1) y = y - 2;
-// 重排遍历顺序
-//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out;
-//int new_a = sumVal % width_shape_out;
-//int new_b = int((sumVal - new_a) / width_shape_out) % height_shape_out;
-//int new_g = int((((sumVal - new_a) / width_shape_out) - new_b) / height_shape_out);
-//int x = new_a;
-//int c = new_g;
-//int y = new_b;
+        if (int(mod(float(x), 2.0)) == 1) x = x - 2;
+        if (int(mod(float(y), 2.0)) == 1) y = y - 2;
        // 获取output的坐标
        int oTensorChannel = int(c * groups / channel_out) * channel_origin;
        int oy = y;
@@ -43,8 +35,7 @@ export default `
                }
                // channel计算
                for (int j = 0; j < channel_origin; j++) {
-
-                	if (ox % stride_h == 0 && oy % stride_v == 0) {
+                	if (int(mod(float(ox), float(stride_h))) == 0 && int(mod(float(oy), float(stride_v))) == 0) {
 						temp_x = int(floor(float(ox) / float(stride_h)));
 						temp_y = int(floor(float(oy) / float(stride_v)));
                        if (temp_x < width_shape_origin && temp_y < height_shape_origin){

--- a/src/shader/elementwise_add/main.es6
+++ b/src/shader/elementwise_add/main.es6
@@ -7,22 +7,22 @@ export default `
 // start函数
 void main(void) {
    // 输出数据
-    ivec4 oPos = getOutputTensorPos();
-    float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+    ivec4 oPos = getOutputTensorPosLIMIT_OUT();
+    float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);
 	ivec4 pos_counter;
 	float c = 0.0;

 	if (axis == 1){
-        c = getValueFromTensorPos_counter(0, oPos.r, oPos.g, oPos.b);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, oPos.r, oPos.g, oPos.b);
    }
    else if (axis == 2){
-        c = getValueFromTensorPos_counter(0, 0, oPos.r, oPos.g);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, oPos.r, oPos.g);
    }
    else if (axis == 3){
-        c = getValueFromTensorPos_counter(0, 0, 0, oPos.r);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, 0, oPos.r);
    }
    else {
-        c = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(oPos.r, oPos.g, oPos.b, oPos.a);
    }
 	float res = c + o;
 	setOutput(float(res));

--- a/src/shader/mul/main.es6
+++ b/src/shader/mul/main.es6
@@ -7,10 +7,10 @@ export default `
 void main(void) {
    float res = 0.0;
    // 获取output的坐标
-    ivec4 out_pos = getOutputTensorPos();
+    ivec4 out_pos = getOutputTensorPosLIMIT_OUT();
    for (int j = 0; j < width_shape_origin; j++) {
-        float c = getValueFromTensorPos_counter(out_pos[0], out_pos[1], j, out_pos[3]);
-        float o = getValueFromTensorPos_origin(out_pos[0], out_pos[1], out_pos[2], j);
+        float c = getValueFromTensorPosLIMIT_COUNTER_counter(out_pos[0], out_pos[1], j, out_pos[3]);
+        float o = getValueFromTensorPosLIMIT_COUNTER_origin(out_pos[0], out_pos[1], out_pos[2], j);
        res += c * o;
    }
    setOutput(res);

--- a/src/shader/split/main.es6
+++ b/src/shader/split/main.es6
@@ -6,7 +6,7 @@
 export default `
 // start函数
 void main(void) {
-    int length = int(target_value.length() / num);
+    int length = int(target_length / num);
    ivec4 oPos = getOutputTensorPos();
    // 输出坐标转换为输入坐标
 	//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;

--- a/src/shader/split/params.es6
+++ b/src/shader/split/params.es6
@@ -18,7 +18,7 @@ const int total_shape_out = TOTAL_SHAPE_OUT;

 const int dim = DIM;
 const int num = NUM;
-const int target_value[TARGET_LENGTH] = int[](TARGET_VALUE);
+const int target_length = TARGET_LENGTH;


 // 输入数据

--- a/src/shader/transpose2/main.es6
+++ b/src/shader/transpose2/main.es6
@@ -19,10 +19,10 @@ void main(void) {
 		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[2], oPos[3]);
 	}
 	else if (perm_size == 2) {
-		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[min(2 + perm_0, 3)], oPos[min(2 + perm_1, 3)]);
+		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[(2 + perm_0)>3?3:(2 + perm_0)], oPos[(2 + perm_1)>3?3:(2 + perm_1)]);
 	}
 	else if (perm_size == 3) {
-		o = getValueFromTensorPos_origin(oPos[0], oPos[min(1 + perm_0, 3)], oPos[min(1 + perm_1, 3)], oPos[min(1 + perm_2, 3)]);
+		o = getValueFromTensorPos_origin(oPos[0], oPos[(1 + perm_0)>3?3:(1 + perm_0)], oPos[(1 + perm_1)>3?3:(1 + perm_1)], oPos[(1 + perm_2)>3?3:(1 + perm_2)]);
 	}
 	else if (perm_size == 4) {
 		o = getValueFromTensorPos_origin(oPos[perm_0], oPos[perm_1], oPos[perm_2], oPos[perm_3]);

--- a/src/utils/opData.es6
+++ b/src/utils/opData.es6
@@ -125,7 +125,6 @@ const mergeType = 'conv2d-elementwise_add';

 export default class OpData {
    constructor(name, input = {}, output = {}, attrs = {}) {
-        // console.dir(this);
        this.realName = name;
        this.name = name;
        this.attrs = attrs;
@@ -205,7 +204,6 @@ export default class OpData {
                // 默认取第一个数据
                const data = this.output[key] || [{}];
                if (tensorName[key.toLowerCase()]) {
-                    // console.dir(this);
                    data.forEach(item => {
                        item.tensorName = tensorName[key.toLowerCase()];
                        tensorData.push(item);
@@ -453,7 +451,7 @@ export default class OpData {
        this.attrs.target_length = dim_value.length;
        this.attrs.target_value = dim_value;
        // 保存 输入 tensor 对应dim 的长度
-        this.attrs.inputs_dim = [origin_shape[axis]];
+        this.attrs.inputs_dim = origin_shape[axis];
        this.attrs.dim = 4 - origin_shape.length + axis;
    }


--- a/src/utils/utils.es6
+++ b/src/utils/utils.es6
@@ -119,12 +119,13 @@ export default {
        let offsetY = 0;
        // 安卓和ios的max texture size是4096, 改造存储空间(2bh, cw / 2)
        let exceedMax = false;
-        // FIXME:为了让mobilenet能正常执行，这里先注释掉，待群哥修复
-        // if (height > MAX_TEXTURE_SIZE || width > MAX_TEXTURE_SIZE) {
-        //     height *= 2;
-        //     width = c * (Math.ceil(w / 2));
-        //     exceedMax = true;
-        // }
+        // trick TEXTURE_SIZE 超限问题，后续升级更优解
+        if (height > 4096 || width > 4096) {
+            //console.error('大小超限', shape);
+            //height *= 4;
+            //width = c * (Math.ceil(w / 4));
+            //exceedMax = true;
+        }
        if (isPacked) {
            // 紧凑布局
            height = b * c * Math.ceil(h / 2);
@@ -203,7 +204,7 @@ export default {
        return fourDimShape;
    },

-    /* 
+    /*
     * 将nhwc排布数据转为nchw排布数据
     */
    nhwc2nchw(data, shape) {
@@ -226,7 +227,7 @@ export default {
        return nchwData;
    },

-    /* 
+    /*
     * 将nchw排布数据转为nhwc排布数据
     */
    nchw2nhwc(data, shape) {
@@ -249,9 +250,9 @@ export default {
        return nhwcData;
    },

-    /* 
+    /*
     * 等距间隔打印数据
-     */ 
+     */
    stridePrint(data, count = 20) {
        let realPrintCount = count;
        if (data.length <= realPrintCount) {
@@ -267,10 +268,10 @@ export default {
        for (let i = 0; i < realPrintCount; i++) {
            numbers.push(i * stride + ": " + data[i * stride]);
        }
-        console.log(numbers)
+        console.log(numbers);
    },

-    /* 
+    /*
     * 连续打印数据
     */
    continuousPrint(data, count = 100) {
@@ -282,7 +283,7 @@ export default {
        for (let i = 0; i < realPrintCount; i++) {
            numbers.push(i + ": " + data[i]);
        }
-        console.log(numbers)
+        console.log(numbers);
    },

    softmax(nchwData) {
@@ -306,6 +307,44 @@ export default {
        }
        return result;

+    },
+
+    // 针对model final texture输出超限后，inst.read读取数据不对的case
+    formatReadData(nchwData, nchwShape) {
+        if (nchwShape.length < 4) {
+            let batch = [];
+            for (let i = 0; i < (4 - nchwShape.length); i++) {
+                batch.push(1);
+            }
+            nchwShape = batch.concat(nchwShape);
+        }
+        const shape_b = nchwShape[0];
+        const shape_c = nchwShape[1];
+        const shape_h = nchwShape[2];
+        const shape_w = nchwShape[3];
+        const texture_height = shape_b * shape_h;
+        const texture_width = shape_c * shape_w;
+
+        if (texture_height <= 4096 && texture_width <= 4096) {
+            return nchwData;
+        }
+        let pos = 0;
+        const formatData = [];
+        const pieceW = Math.ceil(shape_w / 4); // reshape后的 shape_width
+
+        for (let bIndex = 0; bIndex < shape_b; bIndex++) {
+            for (let cIndex = 0; cIndex < shape_c; cIndex++) {
+                for (let hIndex = 0; hIndex < shape_h; hIndex++) {
+                    for (let wIndex = 0; wIndex < shape_w; wIndex++) {
+                        pos = Math.floor(wIndex / pieceW) * pieceW * (shape_h - 1) + wIndex + hIndex * pieceW;
+                        pos += bIndex * shape_c * shape_h * shape_w+ cIndex  * shape_h * shape_w;
+                        formatData.push(nchwData[pos]);
+                    }
+                }
+            }
+        }
+
+        return formatData;
    }
 };
 /* eslint-enable */
--- a/test/data/map.json
+++ b/test/data/map.json
--- a/test/data/model.test.conv2d.json
+++ b/test/data/model.test.conv2d.json
@@ -126,7 +126,6 @@
        2,
        2
      ]
-    },
-
+    }
  ]
 }
--- a/test/testUtils/testUtils.es6
+++ b/test/testUtils/testUtils.es6
@@ -22,7 +22,7 @@ const unitPath = {
    'split': 'model.test.split.json'
 };
 // 制定运行的 op
-const modelType = 'split';
+const modelType = 'conv2d';
 // 制定运行的 op
 const unitData = unitPath[modelType];

@@ -63,13 +63,13 @@ async function run() {
    // 获取 NHWC -> NCHW 的 输出
    const outputNCHWShape = getOutputShape();
    const outputNHWCShape = nchwShape2nhwcShape(outputNCHWShape);
-    let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);

-    console.log('result');
-    console.log(result);
+    let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);
+    const formatData = Utils.formatReadData(nchwResult, outputNCHWShape);

    console.log('NCHW RESULT');
-    console.log(nchwResult);
+    console.log(formatData);
+
 }

 run();
@@ -112,6 +112,7 @@ function nchwShape2nhwcShape(nchw) {
        }
        batchNCHW = batch.concat(nchw);
    }
+
    const N = batchNCHW[0];
    const C = batchNCHW[1];
    const H = batchNCHW[2];