Merge pull request #2 from PaddlePaddle/master

[change][D]更新兼容性提交

Merge pull request #2 from PaddlePaddle/master
[change][D]更新兼容性提交
e24f59e9 · wangqun · GitHub · cec3497d · 91561d60 · e24f59e9
32 changed file
--- a/README_cn.md
+++ b/README_cn.md
@@ -6,20 +6,19 @@ Paddle.js是百度Paddle的web方向子项目，是一个运行在浏览器中
 ## 主要特点
-### Modular
+### 模块化
-Web project is built on Atom system which is a versatile framework to support GPGPU operation on WebGL. It is quite modular and could be used to make computation tasks faster by utilizing WebGL.
+Paddle.js项目基于Atom系统构建，该系统是一个通用框架，可支持WebGL上的GPGPU操作。 它非常模块化，可以通过利用WebGL来更快地执行计算任务。
 ### 浏览器覆盖范围
-* PC: Chrome
+* PC: Chrome, firefox
-* Mac: Chrome
+* Mac: Chrome, Safari
 * Android: Baidu App and QQ Browser
 ### 支持的操作
-Currently Paddle.js only supports a limited set of Paddle Ops. See the full list. If your model uses unsupported ops, the Paddle.js script will fail and produce a list of the unsupported ops in your model. Please file issues to let us know what ops you need support with.
+目前，Paddle.js只支持有限的一组算子操作。如果您的模型中使用了不支持的操作，那么padde.js将运行失败并提示您的模型中有哪些op算子目前还不支持。如果您的模型中存在目前Paddle.js不支持的算子，请提出问题，让我们知道你需要支持。
-目前，Paddle.js只支持有限的一组算子操作。如果您的模型使用不支持的操作，那么padde.js将运行失败，如果您的模型中存在不支持的op操作的列表。请提出问题，让我们知道你需要支持。
 [查看完整列表](./src/factory/fshader/README.md)
@@ -86,6 +85,7 @@ let result = await inst.read();
 ## 运行Paddle.js提供的转换器脚本
 模型转换器需要输入一个Paddle格式的model，可以是Paddle Hub中的model，运行转换器将会得到paddle.js的JSON格式model。
+[查看转换工具使用方法](./tools/ModelConverter/README_cn.md)
 ## Web友好的model格式
@@ -103,6 +103,6 @@ Paddle.js已经将一些模型转换成了Paddle.js支持的格式。在下面
 ## 反馈和社区支持
- Questions, reports, and suggestions are welcome through Github Issues!
+- 欢迎在Github Issue中提出问题，反馈和建议！
- Forum: Opinions and questions are welcome at our [PaddlePaddle Forum](https://ai.baidu.com/forum/topic/list/168)！
+- 欢迎在我们的[PaddlePaddle Forum](https://ai.baidu.com/forum/topic/list/168)提出观点，进行讨论！
- QQ group chat: 696965088
+- QQ群：696965088
--- a/src/feed/ImageFeed.es6
+++ b/src/feed/ImageFeed.es6
@@ -151,8 +151,6 @@ export default class imageFeed {
                }
            }
        }
-        console.log('this is the end of reshapetorgb !!!');
-        console.dir(result);
        return result;
    };
@@ -164,7 +162,6 @@ export default class imageFeed {
     * @return {Object} 缩放后的尺寸
     */
    reSize(image, params) {
-        console.log('execute resize!!');
        // 原始图片宽高
        const width = this.pixelWidth;
        const height = this.pixelHeight;
@@ -192,7 +189,6 @@ export default class imageFeed {
     * 根据scale缩放图像并且缩放成目标尺寸并居中
     */
    resizeAndFitTargetSize(image, params){
-        console.log('execute resizeAndFitTargetSize!!');
        // 原始图片宽高
        const width = this.pixelWidth;
        const height = this.pixelHeight;
@@ -249,7 +245,6 @@ export default class imageFeed {
            sh = Math.round(sw * this.pixelHeight / this.pixelWidth);
            y = Math.floor((targetHeight - sh) / 2);
        }
-        // console.log(x, y, sw, sh);
        if (center) {
            this.fromPixels2DContext.drawImage(
                image, x, y, sw, sh);
@@ -327,24 +322,18 @@ export default class imageFeed {
                data = this.resizeAndFitTargetSize(pixels, opt);
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
-            else if (opt.scale) { // 兼容以前的，如果有scale就是短边缩放到scale模式
+            else if (opt.scale) { // 直接resize到targetShape Humanseg的情况
                scaleSize = this.reSize(pixels, opt);
-                console.dir(scaleSize);
-                console.dir(pixels);
                data = this.getImageData(opt, 0, 0, scaleSize);
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
            else if (opt.targetSize) { // 如果有targetSize，就是装在目标宽高里的模式 TinyYolo的情况
                scaleSize = this.fitToTargetSize(pixels, opt);
                data = this.getImageData(opt, 0, 0, scaleSize);
                data2 = this.fromPixels2DContext2.getImageData(0, 0, this.pixelWidth, this.pixelHeight);
            }
        }
        if (opt.gray) {
            data = grayscale(data);
        }
@@ -359,6 +348,7 @@ export default class imageFeed {
        else if (opt.targetShape) {
            data = this.allReshapeToRGB(data, opt, scaleSize);
        }
        return [{data: data, shape: opt.shape || opt.targetShape, name: 'image', canvas: data2}];
    }
 }

--- a/src/gpu/gpu.es6
+++ b/src/gpu/gpu.es6
@@ -51,6 +51,10 @@ export default class gpu {
                console.log('float extension is started or not? ' + !!this.textureFloat);
            }
        }
+        this.maxTextureSize = gl.getParameter(gl.MAX_TEXTURE_SIZE);
+        this.maxTextureImageUnits = gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS);
        // 关闭相关功能
        gl.disable(gl.DEPTH_TEST);
        gl.disable(gl.STENCIL_TEST);
@@ -67,14 +71,22 @@ export default class gpu {
        this.waits = 0;
        console.log('WebGl版本是 ' + this.version);
-        console.log('MAX_TEXTURE_SIZE is ' + gl.getParameter(gl.MAX_TEXTURE_SIZE));
+        console.log('MAX_TEXTURE_SIZE is ' + this.maxTextureSize);
-        console.log('MAX_TEXTURE_IMAGE_UNITS is ' + gl.getParameter(gl.MAX_TEXTURE_IMAGE_UNITS));
+        console.log('MAX_TEXTURE_IMAGE_UNITS is ' + this.maxTextureImageUnits);
    }
    getWebglVersion() {
        return this.version;
    }
+    getWebglMaxTextureSize() {
+        return this.maxTextureSize;
+    }
+    getWebglMaxTextureImageUnits() {
+        return this.maxTextureImageUnits;
+    }
    initCache() {
        // 运行次数
        this.times = 0;
@@ -145,7 +157,6 @@ export default class gpu {
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
        gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
        gl.texImage2D(gl.TEXTURE_2D, // Target, matches bind above.
            0,             // Level of detail.
            this.downloadInternalFormat,       // Internal format.
@@ -346,6 +357,7 @@ export default class gpu {
        } else {
            // texture = gl.createTexture();
            if (isRendered && (iLayer > 0 || (iLayer === 0 && item.tensor !== 'origin'))) {
                const tData = this.cacheTextures['' + iLayer];
                texture = tData[item.variable + '_' + item.tensor];
            } else {
@@ -361,6 +373,7 @@ export default class gpu {
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);
            gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);
+            if (this.version == 2){
            gl.texImage2D(gl.TEXTURE_2D,
                0,
                this.internalFormat,
@@ -369,8 +382,27 @@ export default class gpu {
                0,
                this.textureFormat,
                gl.FLOAT,
-                item.data,
+                item.data);
-                0);
+            }
+            else {
+                let oneSize = item.width_texture * item.height_texture;
+                let temp = new Float32Array(item.width_texture * item.height_texture * 4);
+                for (let i = 0; i < item.data.length; i++){
+                    temp[i*4] = (item.data[i]);
+                    temp[i*4+1] = 0;
+                    temp[i*4+2] = 0;
+                    temp[i*4+3] = 0;
+                }
+                gl.texImage2D(gl.TEXTURE_2D,
+                0,
+                gl.RGBA,
+                item.width_texture,
+                item.height_texture,
+                0,
+                gl.RGBA,
+                gl.FLOAT,
+                temp);
+            }
        }
    }
@@ -389,7 +421,7 @@ export default class gpu {
    // 生成帧缓存的texture
    makeTexure(type, data, opts = {}) {
        const gl = this.gl;
-        let index = this.textureBufferIndex % 2;
+        let index = int(mod(float(this.textureBufferIndex), 2.0));
        let texture = this.textureBuffer[index];
        gl.bindTexture(gl.TEXTURE_2D, texture);
@@ -429,6 +461,7 @@ export default class gpu {
    }
    createPBO() {
+        if (this.version == 2){
        const gl2 = this.gl;
        const buffer = this.pbo;
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
@@ -437,42 +470,40 @@ export default class gpu {
        gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, 0);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
        return buffer;
+        }
+        else {
+        let buffer = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
+        const gl2 = this.gl;
+        gl2.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl2.RGBA, gl2.FLOAT, buffer);
+        return buffer;
+        }
    }
    downloadFoat32TensorFromBuffer(buffer) {
        const gl2 = this.gl;
        const size = 4 * this.width_texture_out * this.height_texture_out;
+        if (this.version == 2){
        const pixels = new Float32Array(size);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, buffer);
        gl2.getBufferSubData(gl2.PIXEL_PACK_BUFFER, 0, pixels);
        gl2.bindBuffer(gl2.PIXEL_PACK_BUFFER, null);
-        // log.start('后处理-readloop');
-        // let result = [];
-        // let offset = 0;
-        // for (let h = 0; h < this.height_texture_out; h++) {
-        //     // 纪录第1和2行数据
-        //     let temp1 = [];
-        //     let temp2 = [];
-        //     for (let w = 0; w < this.width_texture_out; w++) {
-        //         temp1.push(pixels[offset]);
-        //         temp1.push(pixels[offset + 1]);
-        //         temp2.push(pixels[offset + 2]);
-        //         temp2.push(pixels[offset + 3]);
-        //         offset += 4;
-        //     }
-        //     result = result.concat(temp1);
-        //     result = result.concat(temp2);
-        // }
        let result = [];
        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
            result.push(pixels[4 * i]);
        }
-        // const result = Array.prototype.slice.call(pixels);
-        // console.dir(['result', result]);
-        // log.end('后处理-readloop');
        return result;
+        }
+        else {
+        let pixels = buffer;
+        let result = [];
+        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
+            result.push(pixels[4 * i]);
+        }
+        return result;
+        }
    }
    getWebglError(status) {
        const gl2 = this.gl;
        switch (status) {
@@ -497,7 +528,7 @@ export default class gpu {
    createAndWaitForFence() {
        const gl2 = this.gl;
-        const isFenceEnabled = (gl2.fenceSync !== null);
+        const isFenceEnabled = (gl2.fenceSync != null);
        let isFencePassed = () => true;
        if (isFenceEnabled) {
            const sync = gl2.fenceSync(gl2.SYNC_GPU_COMMANDS_COMPLETE, 0);
@@ -531,10 +562,8 @@ export default class gpu {
        let pixels = new Float32Array(this.width_texture_out * this.height_texture_out * 4);
        // gl.pixelStorei(gl.UNPACK_ALIGNMENT, 1);
        const tt2 = +Date.now();
-        gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels, 0);
+        gl.readPixels(0, 0, this.width_texture_out, this.height_texture_out, gl.RGBA, gl.FLOAT, pixels);
        // console.log('本次读取数据时间是' + (+Date.now() - tt2)+ ',' + (tt2 - tt));
-        // log.end('后处理-readinside');
-        // log.start('后处理-readloop');
        let result = [];
        for (let i = 0; i < this.width_texture_out * this.height_texture_out; i++) {
            result.push(pixels[4 * i]);

--- a/src/graph/graph.es6
+++ b/src/graph/graph.es6
 /* eslint-disable */
 import GraphExecutor from '../executor/executor';
-import IO from '../feed/imageFeed';
 import Runtime from '../runtime/runtime';
 import OpData from '../utils/opData';
 import Factory from '../factory/fshader/factory';
@@ -92,9 +91,14 @@ export default class Graph {
            return;
        }
        opindex++;
-        // console.log(opindex);
-        //if (executor.opData) console.log(executor.opData.iLayer);
        executor.execute(this.inst, this.isExecuted);
+        if (false && executor.opData && opindex >= 184){
+            console.log('return!');
+            console.dir(executor);
+            console.dir(executor.type);
+            console.dir(this);
+            return;
+        }
        if (executor.next) {
            const id = executor.next;
            const next = this.getTensor(id);
@@ -199,105 +203,63 @@ export default class Graph {
        });
    }
-    execute_try(temp, ops, idtoindex, executed, inline, prev){
-        console.log('execute_try!first look at this op');
-        console.log(ops[temp]);
-        let canrun = this.checkifcanrun(temp, ops, idtoindex, executed);
-        if (canrun === false) {
-            // console.log('canrun === false!');
-            var a = inline.pop();
-            this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, prev);
-            return;
-        }
-        if (prev >=0) {
-            ops[prev].next = ops[temp].id;
-        }
-        ops[temp].outputsName.forEach(function(item, index) {
-            executed[item] = true;
-        })
-        let next = this.getNextByOp(ops, ops[temp]);
-        // console.log('this is its next:');
-        // console.dir(next);
-        while (next.length === 1) {
-            let flag = true;
-            for (let i = 0; i < next[0].inputsName.length; i++){
-                if (executed[next[0].inputsName[i]] === false) flag = false;
-            }
-            if (flag === false) {
-                // console.log('can not execute next now! jump to another op:');
-                if (inline.length === 0) return;
-                prev = temp;
-                let a = inline.pop();
-                // console.dir(a);
-                ops[temp].next = a.id;
-                temp = idtoindex[a.id];
-                this.execute_try(temp, ops, idtoindex, executed, inline, prev);
-                return;
-            }
-            else {
-                // console.log('now execute next op! it is');
-                ops[temp].next = next[0].id;
-                temp = idtoindex[next[0].id];
-                // console.dir(ops[temp]);
-                next = this.getNextByOp(ops, ops[temp]);
-                // console.log('its next is: ');
-                ops[temp].outputsName.forEach(function(item, index) {
-                    executed[item] = true;
-                })
-                // console.dir(next);
-            }
-        }
-        if (next.length > 1){
-            // console.log('next.length > 1!!!');
-            for (let i = next.length - 1; i >=0 ; i--){
-                 inline.push(next[i]);
-            }
-            var a = inline.pop();
-            this.execute_try(idtoindex[a.id], ops, idtoindex, executed, inline, temp);
-        }
-        return;
-    }
    arrangeMap(ops) {
-        // console.log('arrangeMap!');
-        // console.dir(ops);
-        var idtoindex = {};
        var executed = {};
-        var inline = [];
+        var inIndex = [];
+        var idtoindex = {};
        let temp = 0;
-        // console.log('graph ops:');
-        // console.dir(ops);
        let ops1 = ops;
        ops1.forEach(function(item, index) {
-            idtoindex[item.id] = index;
-            // console.dir(item);
            item.outputsName.forEach(function(i, idx){
-                executed[i] = false;
+                executed[i] = true;
            })
        });
-        //ops[0].inputsName[0] = {name : "feed"};
+         ops1.forEach(function(item, index) {
-       // ops[0].outputsName[0] = {name : "image"};
+            inIndex[index] = 0;
-        this.execute_try(temp, ops, idtoindex, executed, inline, -1);
+            idtoindex[item.id] = index;
+            if (item.inputsName.length > 1) {
+                item.inputsName.forEach(function(i,idx){
+                    if (executed[i] == true) inIndex[index]++;
+                })
+            }
+            else inIndex[index] = item.inputsName.length;
+        });
+        this.topoSort(ops, inIndex, idtoindex);
        return ops;
    }
-    checkifcanrun(temp, ops, executed){
+    topoSort(ops, inIndex, idtoindex){
-        if (!ops[temp].inputsName) return true;
+        var inline = [];
-        for (let i = 0; i < ops[temp].inputsName.length; i++){
+        inline.push(ops[0]);
-                if (executed[ops[temp].inputsName[i]] === false)  return false;
+        let ops_temp = ops.slice(0);
+        let prev = null;
+        let a = ops[0];
+        while(inline.length > 0){
+            if (prev != null) ops[idtoindex[prev.id]].next = a.id;
+            prev = a;
+            a = inline.pop();
+            for (let i = 0; i < a.outputsName.length; i++){
+                for (let k = 0; k < ops_temp.length; k++){
+                    for (let j = 0; j < ops_temp[k].inputsName.length; j++){
+                        if (ops_temp[k].inputsName[j] == a.outputsName[i]) {
+                            inIndex[idtoindex[ops_temp[k].id]]--;
+                            if (inIndex[idtoindex[ops_temp[k].id]] == 0){
+                                inline.push(ops[idtoindex[ops_temp[k].id]]);
+                                ops_temp.splice(k,1);
+                                k--;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
        }
-        return true;
    }
    /**
     * Get Ops Nets Start Node
     * @param ops
@@ -348,8 +310,6 @@ export default class Graph {
     * @returns {*}
     */
    createOpsMap(ops) {
-        // console.log('ops!!');
-        // console.dir(ops);
        return ops.map((item, idx) => {
            item.idx = idx;
            const graphExecutor = new GraphExecutor(item);
@@ -372,17 +332,6 @@ export default class Graph {
        });
    }
-    getNextByOp(ops, op) {
-        return ops.filter((item, key) => {
-            for (let i = 0; i < item.inputsName.length; i++) {
-                for(let j = 0; j < op.outputsName.length; j++) {
-                    if (item.inputsName[i] === op.outputsName[j]) {
-                        return true;
-                    }
-                }
-            }
-        });
-    }
    /**
     * dispose
     */

--- a/src/loader/loader.es6
+++ b/src/loader/loader.es6
@@ -109,8 +109,9 @@ export default class Loader  {
        const TMP_REGEX = /\-/;
        let requesterArr = arr.map(item => {
            if (item.name
-                && item.name.match(TMP_SCHEME_REGEX) === null
+                // && item.name.match(TMP_SCHEME_REGEX) === null
-                && item.name.match(TMP_REGEX) === null) {
+                // && item.name.match(TMP_REGEX) === null
+                ) {
                return this.fetchData(item.name).then(data => item.data = data);
            }
            return Promise.resolve();
@@ -124,9 +125,9 @@ export default class Loader  {
        let marker = 0; // 读到哪个位置了
        let len; // 当前op长度
        arr.filter(item => {
-            return item.name
+            return item.name;
-                && item.name.match(TMP_SCHEME_REGEX) === null
+                // && item.name.match(TMP_SCHEME_REGEX) === null
-                && item.name.match(TMP_REGEX) === null;
+                // && item.name.match(TMP_REGEX) === null;
            })
            // .sort((a, b) => {
            //     if (a.name > b.name) {

--- a/src/paddle/paddle.es6
+++ b/src/paddle/paddle.es6
@@ -48,13 +48,9 @@ export default class Paddle {
        that.graph = graph;
        that.graph.data = artifacts.data;
        that.graph.formatWeight(that.graph.data.vars);
-        const opsMap = that.graph.createOpsMap(that.graph.data.ops, that.graph.data.vars);
+        const opsMap = that.graph.createOpsMap(that.graph.data.ops);
        const opsMap1 = that.graph.constructOpsMap(opsMap);
-        // console.log('opsMap1!');
-        // console.dir(opsMap1);
        const opsMap2 = that.graph.arrangeMap(opsMap1);
-        // console.log('opsMap2!');
-        // console.dir(opsMap2);
        that.graph.weightMap = opsMap2;
    }
    /**
@@ -68,10 +64,10 @@ export default class Paddle {
        this.feed = this.graph.feed = inputs;
        // 生成op数据
        if (!this.graph.isExecuted) {
-            this.graph.weightMap.forEach(op => {
+            this.graph.weightMap.forEach((op, index) => {
                const type = op.type;
                if (type !== 'feed' && type !== 'fetch') {
-                    console.log(op.type);
                    that.graph.buildOpData(op);
                }
            });
@@ -81,7 +77,6 @@ export default class Paddle {
    }
    updateFeed() {
        this.graph.feedItem.data = this.graph.feed.input[0].data;
-        // Utils.img2texture(this.graph.feedItem);
    }
    /**
     * dispose

--- a/src/runtime/runtime.es6
+++ b/src/runtime/runtime.es6
@@ -2,9 +2,6 @@
 import Gpu from '../gpu/gpu';
 import getMaxUniforms from '../test/getMaxUniforms';
 import Factory from '../factory/fshader/factory';
-// import {getTextureShapeInfo} from '../utils/opData';
-// 生成factory实例
-// const factory = new Factory({});
 /**
 * @file gpu运行时
 * @author wangqun@baidu.com, yangmingming@baidu.com
@@ -29,6 +26,14 @@ export default {
        return this.gpu.getWebglVersion();
    },
+    getWebglMaxTextureSize() {
+        return this.gpu.maxTextureSize();
+    },
+    getWebglMaxTextureImageUnits() {
+        return this.gpu.maxTextureImageUnits();
+    },
    run(opName, opData, isRendered) {
        // console.dir(['fscode', opData.fsCode]);
        // let time = +Date.now();
@@ -64,6 +69,7 @@ export default {
                this.gpu.render(opData.renderData, opData.iLayer, isRendered);
             // }
        });
    },
    /**

--- a/src/shader/atom/getValueFromTensorPos.es6
+++ b/src/shader/atom/getValueFromTensorPos.es6
@@ -7,29 +7,38 @@
 export default `
 // 根据tensor坐标获取这个tensor位置的值
 float getValueFromTensorPos_TENSOR_NAME(int r, int g, int b, int a) {
-    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME, 
+    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
        vec2(
-            (float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME), 
+            (float(a * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
            (float(r * height_shape_TENSOR_NAME + b) + 0.5) / float(height_texture_TENSOR_NAME)
        )
    );
    // 只用了r通道
    return pixels.r;
 }
-// 紧凑型布局根据tensor坐标获取这个tensor位置的值
+// 超限布局根据tensor坐标获取这个tensor位置的值
 float getValueFromTensorPosLimit_TENSOR_NAME(int r, int g, int b, int a) {
-    float halfW = ceil(float(width_shape_TENSOR_NAME) / 2.0);
+    float pieceW = ceil(float(width_shape_TENSOR_NAME) / 4.0);
-    int x = int(mod(float(a), halfW));
+    int x = int(mod(float(a), pieceW));
    int offsetY = 0;
-    if (a > x) {
+    if ((float(a) / pieceW) >= 3.0) {
+        offsetY = 3 * height_shape_TENSOR_NAME;
+    }
+    else if (float(a) / pieceW >= 2.0) {
+        offsetY = 2 * height_shape_TENSOR_NAME;
+    }
+    else if (float(a) >= pieceW) {
        offsetY = height_shape_TENSOR_NAME;
    }
-    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME, 
+    vec4 pixels = TEXTURE2D(texture_TENSOR_NAME,
        vec2(
-            (float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME), 
+            (float(x * channel_TENSOR_NAME + g) + 0.5) / float(width_texture_TENSOR_NAME),
-            (float(r * 2 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
+            (float(r * 4 * height_shape_TENSOR_NAME + b + offsetY) + 0.5) / float(height_texture_TENSOR_NAME)
        )
    );
    return pixels.r;
 }
 `;
--- a/src/shader/atom/prefix.es6
+++ b/src/shader/atom/prefix.es6
@@ -8,11 +8,13 @@ export default `
    precision highp float;
    precision highp int;
 #else
-    precision mediump float;
+    precision highp float;
-    precision mediump int;
+    precision highp int;
 #endif
+    varying vec2 vCoord;
+    varying vec4 outColor;
    void setOutput(float result) {
-        gl_FragColor.r = result;
+            gl_FragColor.r = result;
    }
 `;
--- a/src/shader/atom/suffix.es6
+++ b/src/shader/atom/suffix.es6
@@ -15,17 +15,18 @@ ivec4 getOutputTensorPos() {
    return ivec4(b, c, y, x);
 }
 ivec4 getOutputTensorPosLimit() {
    // 获取原始长度
    vec2 outCoord = vCoord.xy * _2d_shape_texture_out;
    float offsetY = floor(outCoord.y / float(height_shape_out));
    int x = int(outCoord.x / float(channel_out));
-    if (mod(offsetY, 2.0) > 0.0) {
+    if (mod(offsetY, 4.0) > 0.0) {
-        x += int(ceil(float(width_shape_out) / 2.0));
+        x += int(mod(offsetY, 4.0)) * int(ceil(float(width_shape_out) / 4.0));
    }
    int y = int(mod(outCoord.y, float(height_shape_out)));
    int c = int(mod(outCoord.x, float(channel_out)));
-    int b = int(outCoord.y / float(2 * height_shape_out));
+    int b = int(outCoord.y / float(4 * height_shape_out));
    return ivec4(b, c, y, x);
 }

--- a/src/shader/atom/transferFromNHWCtoNCHW.es6
+++ b/src/shader/atom/transferFromNHWCtoNCHW.es6
@@ -10,13 +10,13 @@ export default `
 ivec4 transferFromNHWCtoNCHW( int sumVal,  const int channel, const int width_shape, const int height_shape, const int total_shape) {
 	int n_origin = int(total_shape/(channel * width_shape * height_shape));
-	int new_a = sumVal % width_shape;
+	int new_a = int(mod(float(sumVal), float(width_shape)));
 	sumVal = int((sumVal - new_a) / width_shape);
-	int new_b = sumVal % height_shape;
+	int new_b = int(mod(float(sumVal), float(height_shape)));
 	sumVal = int((sumVal - new_b) / height_shape);
-	int new_g = sumVal % channel;
+	int new_g = int(mod(float(sumVal), float(channel)));
 	sumVal = int((sumVal - new_g) / channel);
-	int new_r = sumVal % n_origin;
+	int new_r = int(mod(float(sumVal), float(n_origin)));
 	return ivec4(new_r,new_g,new_b,new_a);
 }
 `;
--- a/src/shader/batchnorm/main.es6
+++ b/src/shader/batchnorm/main.es6
@@ -7,14 +7,14 @@ export default `
 // start函数
 void main(void) {
    // 输出数据
-    ivec4 oPos = getOutputTensorPos();
+    ivec4 oPos = getOutputTensorPosLIMIT_OUT();
-    float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+    float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);
    // 归一化数据
-    vec4 scale = getPixelsFromTexturePos_texture_scale(vec2( float(oPos.g) / float(width_texture_scale), 0.0));
+    vec4 scale = getPixelsFromTexturePos_texture_scale(vec2((float(oPos.g) + 0.5) / float(width_texture_scale), 0.0));
-    vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g)) / float(width_texture_bias), 0.0));
+    vec4 bias = getPixelsFromTexturePos_texture_bias(vec2((float(oPos.g) + 0.5) / float(width_texture_bias), 0.0));
-    vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g)) / float(width_texture_mean), 0.0));
+    vec4 mean = getPixelsFromTexturePos_texture_mean(vec2((float(oPos.g) + 0.5) / float(width_texture_mean), 0.0));
-    vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g)) / float(width_texture_variance), 0.0));
+    vec4 variance = getPixelsFromTexturePos_texture_variance(vec2((float(oPos.g) + 0.5) / float(width_texture_variance), 0.0));
    float x = (o - mean[0]) / sqrt(variance[0] + epsilon);
    float res = scale[0] * x + bias[0];

--- a/src/shader/bilinear_interp/conf.es6
+++ b/src/shader/bilinear_interp/conf.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp的配置文件
+ * @author chenhaoze
+ */
+export default {
+    dep: [
+		{
+			func: 'getValueFromTensorPos',
+			conf: {
+				TENSOR_NAME: 'origin'
+			}
+		},
+		{
+			func: 'transferFromNHWCtoNCHW',
+			conf:{
+			}
+		}
+    ],
+    conf: [
+		'WIDTH_SHAPE_ORIGIN',
+		'HEIGHT_SHAPE_ORIGIN',
+		'LENGTH_SHAPE_ORIGIN',
+		'WIDTH_TEXTURE_ORIGIN',
+		'HEIGHT_TEXTURE_ORIGIN',
+		'CHANNEL_ORIGIN',
+		'WIDTH_SHAPE_OUT',
+		'HEIGHT_SHAPE_OUT',
+		'WIDTH_TEXTURE_OUT',
+		'HEIGHT_TEXTURE_OUT',
+		'CHANNEL_OUT',
+		'OFFSET_Y_OUT',
+		'MULTI_VALUE',
+		'BIAS_VALUE',
+		'ACTIVE_FUNCTION'
+    ],
+    input: [
+			{
+				tensor: 'origin',
+				variable: 'texture',
+				setter: 'initTexture',
+				type: 'texture'
+			}
+    ]
+};
--- a/src/shader/bilinear_interp/main.es6
+++ b/src/shader/bilinear_interp/main.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp主函数
+ * @author chenhaoze
+ */
+export default `
+// start函数
+void main(void) {
+    // 输出数据
+	ivec4 oPos = getOutputTensorPos();
+    // 输出坐标转换为输入坐标
+	//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
+ 	//oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
+	float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+	float scale_x = float(width_shape_out - 1) / float(width_shape_origin - 1);
+    float scale_y = float(height_shape_out - 1) / float(height_shape_origin - 1);
+    float x = float(oPos.a) / scale_x;
+    float y = float(oPos.b) / scale_y;
+	int x1 = int(floor(x));
+	int y1 = int(floor(y));
+	int x2 = int(ceil(x));
+	int y2 = int(ceil(y));
+	float dist_x = x - float(x1);
+    float dist_y = y - float(y1);
+    float value11 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x1);
+    float value12 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x1);
+    float value21 = getValueFromTensorPos_origin(oPos.r, oPos.g, y1, x2);
+    float value22 = getValueFromTensorPos_origin(oPos.r, oPos.g, y2, x2);
+    float value = (1.0 - dist_x) * (1.0 - dist_y) * value11 +
+            (1.0 - dist_x) * dist_y * value12 + dist_x * (1.0 - dist_y) * value21 +
+            dist_x * dist_y * value22;
+    setOutput(float(value));
+}
+`;
--- a/src/shader/bilinear_interp/params.es6
+++ b/src/shader/bilinear_interp/params.es6
+/* eslint-disable */
+/**
+ * @file bilinear_interp参数文件
+ * @author chenhaoze
+ */
+export default `
+// 输入数据
+const int width_shape_origin = WIDTH_SHAPE_ORIGIN;
+const int height_shape_origin = HEIGHT_SHAPE_ORIGIN;
+const int length_shape_origin = LENGTH_SHAPE_ORIGIN;
+const int width_texture_origin = WIDTH_TEXTURE_ORIGIN;
+const int height_texture_origin = HEIGHT_TEXTURE_ORIGIN;
+const int channel_origin = CHANNEL_ORIGIN;
+const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
+const int total_shape_out = TOTAL_SHAPE_OUT;
+// 输入数据
+ uniform sampler2D texture_origin;
+`;
--- a/src/shader/concat/main.es6
+++ b/src/shader/concat/main.es6
@@ -11,8 +11,8 @@ void main(void) {
 //	int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;
 //   ivec4 new_oPos = transferFromNHWCtoNCHW(sumVal, channel_out, width_shape_out, height_shape_out, total_shape_out);
    float o = 0.0;
-    if (oPos[dim] > inputs_dim[0] - 1) {
+    if (oPos[dim] > inputs_dim - 1) {
-        oPos[dim] = oPos[dim] - inputs_dim[0];
+        oPos[dim] = oPos[dim] - inputs_dim;
        o = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
    }
    else {

--- a/src/shader/concat/params.es6
+++ b/src/shader/concat/params.es6
@@ -28,7 +28,7 @@ const int total_shape_origin = TOTAL_SHAPE_ORIGIN;
 const int total_shape_out = TOTAL_SHAPE_OUT;
 const int dim = DIM;
-const int inputs_dim[1] = int[](INPUTS_DIM);
+const int inputs_dim = INPUTS_DIM;
 // uniform变量

--- a/src/shader/conv2d_transpose/main.es6
+++ b/src/shader/conv2d_transpose/main.es6
@@ -16,16 +16,8 @@ export default `
        int temp_y = 0;
        float o = 0.0;
        float f = 0.0;
-        if (x % 2 == 1) x = x - 2;
+        if (int(mod(float(x), 2.0)) == 1) x = x - 2;
-        if (y % 2 == 1) y = y - 2;
+        if (int(mod(float(y), 2.0)) == 1) y = y - 2;
-// 重排遍历顺序
-//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out;
-//int new_a = sumVal % width_shape_out;
-//int new_b = int((sumVal - new_a) / width_shape_out) % height_shape_out;
-//int new_g = int((((sumVal - new_a) / width_shape_out) - new_b) / height_shape_out);
-//int x = new_a;
-//int c = new_g;
-//int y = new_b;
        // 获取output的坐标
        int oTensorChannel = int(c * groups / channel_out) * channel_origin;
        int oy = y;
@@ -43,8 +35,7 @@ export default `
                }
                // channel计算
                for (int j = 0; j < channel_origin; j++) {
+                	if (int(mod(float(ox), float(stride_h))) == 0 && int(mod(float(oy), float(stride_v))) == 0) {
-                	if (ox % stride_h == 0 && oy % stride_v == 0) {
 						temp_x = int(floor(float(ox) / float(stride_h)));
 						temp_y = int(floor(float(oy) / float(stride_v)));
                        if (temp_x < width_shape_origin && temp_y < height_shape_origin){

--- a/src/shader/elementwise_add/main.es6
+++ b/src/shader/elementwise_add/main.es6
@@ -7,22 +7,22 @@ export default `
 // start函数
 void main(void) {
    // 输出数据
-    ivec4 oPos = getOutputTensorPos();
+    ivec4 oPos = getOutputTensorPosLIMIT_OUT();
-    float o = getValueFromTensorPos_origin(oPos.r, oPos.g, oPos.b, oPos.a);
+    float o = getValueFromTensorPosLIMIT_ORIGIN_origin(oPos.r, oPos.g, oPos.b, oPos.a);
 	ivec4 pos_counter;
 	float c = 0.0;
 	if (axis == 1){
-        c = getValueFromTensorPos_counter(0, oPos.r, oPos.g, oPos.b);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, oPos.r, oPos.g, oPos.b);
    }
    else if (axis == 2){
-        c = getValueFromTensorPos_counter(0, 0, oPos.r, oPos.g);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, oPos.r, oPos.g);
    }
    else if (axis == 3){
-        c = getValueFromTensorPos_counter(0, 0, 0, oPos.r);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(0, 0, 0, oPos.r);
    }
    else {
-        c = getValueFromTensorPos_counter(oPos.r, oPos.g, oPos.b, oPos.a);
+        c = getValueFromTensorPosLIMIT_COUNTER_counter(oPos.r, oPos.g, oPos.b, oPos.a);
    }
 	float res = c + o;
 	setOutput(float(res));

--- a/src/shader/mul/main.es6
+++ b/src/shader/mul/main.es6
@@ -7,10 +7,10 @@ export default `
 void main(void) {
    float res = 0.0;
    // 获取output的坐标
-    ivec4 out_pos = getOutputTensorPos();
+    ivec4 out_pos = getOutputTensorPosLIMIT_OUT();
    for (int j = 0; j < width_shape_origin; j++) {
-        float c = getValueFromTensorPos_counter(out_pos[0], out_pos[1], j, out_pos[3]);
+        float c = getValueFromTensorPosLIMIT_COUNTER_counter(out_pos[0], out_pos[1], j, out_pos[3]);
-        float o = getValueFromTensorPos_origin(out_pos[0], out_pos[1], out_pos[2], j);
+        float o = getValueFromTensorPosLIMIT_COUNTER_origin(out_pos[0], out_pos[1], out_pos[2], j);
        res += c * o;
    }
    setOutput(res);

--- a/src/shader/split/main.es6
+++ b/src/shader/split/main.es6
@@ -6,7 +6,7 @@
 export default `
 // start函数
 void main(void) {
-    int length = int(target_value.length() / num);
+    int length = int(target_length / num);
    ivec4 oPos = getOutputTensorPos();
    // 输出坐标转换为输入坐标
 	//int sumVal = oPos.g + oPos.a * channel_out + oPos.b * channel_out * width_shape_out + oPos.r * channel_out * width_shape_out * height_shape_out;

--- a/src/shader/split/params.es6
+++ b/src/shader/split/params.es6
@@ -18,7 +18,7 @@ const int total_shape_out = TOTAL_SHAPE_OUT;
 const int dim = DIM;
 const int num = NUM;
-const int target_value[TARGET_LENGTH] = int[](TARGET_VALUE);
+const int target_length = TARGET_LENGTH;
 // 输入数据

--- a/src/shader/transpose2/main.es6
+++ b/src/shader/transpose2/main.es6
@@ -19,10 +19,10 @@ void main(void) {
 		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[2], oPos[3]);
 	}
 	else if (perm_size == 2) {
-		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[min(2 + perm_0, 3)], oPos[min(2 + perm_1, 3)]);
+		o = getValueFromTensorPos_origin(oPos[0], oPos[1], oPos[(2 + perm_0)>3?3:(2 + perm_0)], oPos[(2 + perm_1)>3?3:(2 + perm_1)]);
 	}
 	else if (perm_size == 3) {
-		o = getValueFromTensorPos_origin(oPos[0], oPos[min(1 + perm_0, 3)], oPos[min(1 + perm_1, 3)], oPos[min(1 + perm_2, 3)]);
+		o = getValueFromTensorPos_origin(oPos[0], oPos[(1 + perm_0)>3?3:(1 + perm_0)], oPos[(1 + perm_1)>3?3:(1 + perm_1)], oPos[(1 + perm_2)>3?3:(1 + perm_2)]);
 	}
 	else if (perm_size == 4) {
 		o = getValueFromTensorPos_origin(oPos[perm_0], oPos[perm_1], oPos[perm_2], oPos[perm_3]);

--- a/src/utils/opData.es6
+++ b/src/utils/opData.es6
@@ -125,7 +125,6 @@ const mergeType = 'conv2d-elementwise_add';
 export default class OpData {
    constructor(name, input = {}, output = {}, attrs = {}) {
-        // console.dir(this);
        this.realName = name;
        this.name = name;
        this.attrs = attrs;
@@ -205,7 +204,6 @@ export default class OpData {
                // 默认取第一个数据
                const data = this.output[key] || [{}];
                if (tensorName[key.toLowerCase()]) {
-                    // console.dir(this);
                    data.forEach(item => {
                        item.tensorName = tensorName[key.toLowerCase()];
                        tensorData.push(item);
@@ -453,7 +451,7 @@ export default class OpData {
        this.attrs.target_length = dim_value.length;
        this.attrs.target_value = dim_value;
        // 保存 输入 tensor 对应dim 的长度
-        this.attrs.inputs_dim = [origin_shape[axis]];
+        this.attrs.inputs_dim = origin_shape[axis];
        this.attrs.dim = 4 - origin_shape.length + axis;
    }

--- a/src/utils/utils.es6
+++ b/src/utils/utils.es6
@@ -119,12 +119,13 @@ export default {
        let offsetY = 0;
        // 安卓和ios的max texture size是4096, 改造存储空间(2bh, cw / 2)
        let exceedMax = false;
-        // FIXME:为了让mobilenet能正常执行，这里先注释掉，待群哥修复
+        // trick TEXTURE_SIZE 超限问题，后续升级更优解
-        // if (height > MAX_TEXTURE_SIZE || width > MAX_TEXTURE_SIZE) {
+        if (height > 4096 || width > 4096) {
-        //     height *= 2;
+            //console.error('大小超限', shape);
-        //     width = c * (Math.ceil(w / 2));
+            //height *= 4;
-        //     exceedMax = true;
+            //width = c * (Math.ceil(w / 4));
-        // }
+            //exceedMax = true;
+        }
        if (isPacked) {
            // 紧凑布局
            height = b * c * Math.ceil(h / 2);
@@ -203,7 +204,7 @@ export default {
        return fourDimShape;
    },
-    /* 
+    /*
     * 将nhwc排布数据转为nchw排布数据
     */
    nhwc2nchw(data, shape) {
@@ -226,7 +227,7 @@ export default {
        return nchwData;
    },
-    /* 
+    /*
     * 将nchw排布数据转为nhwc排布数据
     */
    nchw2nhwc(data, shape) {
@@ -249,9 +250,9 @@ export default {
        return nhwcData;
    },
-    /* 
+    /*
     * 等距间隔打印数据
-     */ 
+     */
    stridePrint(data, count = 20) {
        let realPrintCount = count;
        if (data.length <= realPrintCount) {
@@ -267,10 +268,10 @@ export default {
        for (let i = 0; i < realPrintCount; i++) {
            numbers.push(i * stride + ": " + data[i * stride]);
        }
-        console.log(numbers)
+        console.log(numbers);
    },
-    /* 
+    /*
     * 连续打印数据
     */
    continuousPrint(data, count = 100) {
@@ -282,7 +283,7 @@ export default {
        for (let i = 0; i < realPrintCount; i++) {
            numbers.push(i + ": " + data[i]);
        }
-        console.log(numbers)
+        console.log(numbers);
    },
    softmax(nchwData) {
@@ -306,6 +307,44 @@ export default {
        }
        return result;
+    },
+    // 针对model final texture输出超限后，inst.read读取数据不对的case
+    formatReadData(nchwData, nchwShape) {
+        if (nchwShape.length < 4) {
+            let batch = [];
+            for (let i = 0; i < (4 - nchwShape.length); i++) {
+                batch.push(1);
+            }
+            nchwShape = batch.concat(nchwShape);
+        }
+        const shape_b = nchwShape[0];
+        const shape_c = nchwShape[1];
+        const shape_h = nchwShape[2];
+        const shape_w = nchwShape[3];
+        const texture_height = shape_b * shape_h;
+        const texture_width = shape_c * shape_w;
+        if (texture_height <= 4096 && texture_width <= 4096) {
+            return nchwData;
+        }
+        let pos = 0;
+        const formatData = [];
+        const pieceW = Math.ceil(shape_w / 4); // reshape后的 shape_width
+        for (let bIndex = 0; bIndex < shape_b; bIndex++) {
+            for (let cIndex = 0; cIndex < shape_c; cIndex++) {
+                for (let hIndex = 0; hIndex < shape_h; hIndex++) {
+                    for (let wIndex = 0; wIndex < shape_w; wIndex++) {
+                        pos = Math.floor(wIndex / pieceW) * pieceW * (shape_h - 1) + wIndex + hIndex * pieceW;
+                        pos += bIndex * shape_c * shape_h * shape_w+ cIndex  * shape_h * shape_w;
+                        formatData.push(nchwData[pos]);
+                    }
+                }
+            }
+        }
+        return formatData;
    }
 };
 /* eslint-enable */
--- a/test/data/map.json
+++ b/test/data/map.json
--- a/test/data/model.test.conv2d.json
+++ b/test/data/model.test.conv2d.json
@@ -126,7 +126,6 @@
        2,
        2
      ]
-    },
+    }
  ]
 }
--- a/test/testUtils/testUtils.es6
+++ b/test/testUtils/testUtils.es6
@@ -22,7 +22,7 @@ const unitPath = {
    'split': 'model.test.split.json'
 };
 // 制定运行的 op
-const modelType = 'split';
+const modelType = 'conv2d';
 // 制定运行的 op
 const unitData = unitPath[modelType];
@@ -63,13 +63,13 @@ async function run() {
    // 获取 NHWC -> NCHW 的 输出
    const outputNCHWShape = getOutputShape();
    const outputNHWCShape = nchwShape2nhwcShape(outputNCHWShape);
-    let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);
-    console.log('result');
+    let nchwResult = Utils.nhwc2nchw(result, outputNHWCShape);
-    console.log(result);
+    const formatData = Utils.formatReadData(nchwResult, outputNCHWShape);
    console.log('NCHW RESULT');
-    console.log(nchwResult);
+    console.log(formatData);
 }
 run();
@@ -112,6 +112,7 @@ function nchwShape2nhwcShape(nchw) {
        }
        batchNCHW = batch.concat(nchw);
    }
    const N = batchNCHW[0];
    const C = batchNCHW[1];
    const H = batchNCHW[2];

--- a/tools/ModelConverter/README_cn.md
+++ b/tools/ModelConverter/README_cn.md
+# PaddleJS Model Converter
+PaddleJS Model Converter 是适用于 PaddleJS 的模型转换工具，其作用是将 PaddlePaddle 模型（或称为 fluid 模型）转化为 PaddleJS 模型这种浏览器友好的格式，以供在 PaddleJS 和浏览器中加载预测使用。此外，PaddleJS Model Converter 还提供了强大的模型优化能力，帮助开发者对模型结构进行优化，提高运行时性能。
+## 1. 使用教程
+### 1.1. 环境搭建
+#### Python 版本确认
+确认运行平台的 Python 环境与版本是否满足要求，若使用 Python3 ，则可能需要将后续命令中的 `python` 换成 `python3`：
+- Python3： 3.5.1+ / 3.6 / 3.7
+- Python2： 2.7.15+
+#### 安装虚拟环境
+*由于开发环境可能安装了多个版本的 Python，相关依赖包可能存在不同的版本，为避免产生冲突，**强烈建议**使用 Python 虚拟环境执行转换工具所需的各项命令，以免产生各种问题。若不使用虚拟环境或已安装虚拟环境，可跳过该步骤。*
+以 Anaconda 为例：
+前往 [Anaconda](https://www.anaconda.com/) 主页，选择对应平台、Python 版本的 Anaconda 按照官方提示，进行安装；
+安装完毕后，在命令行执行以下命令，创建Python 虚拟环境：
+``` bash
+conda create --name <your_env_name>
+```
+执行以下命令，切换至虚拟环境
+``` bash
+# Linux 或 macOS下请执行
+source activate <your_env_name>
+# Windows 下请执行
+activate <your_env_name>
+```
+#### 安装依赖
+- 如果`不需要`使用优化模型的能力，执行命令：
+``` bash
+python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
+```
+- 如果`需要`使用优化模型的能力，执行命令：
+``` bash
+python -m pip install paddlepaddle paddlelite -i https://mirror.baidu.com/pypi/simple
+```
+### 1.2. 快速上手
+- 如果待转换的 fluid 模型为`合并参数文件`，即一个模型对应一个参数文件：
+``` bash
+python convertToPaddleJSModel.py --modelPath=<fluid_model_file_path> --paramPath=<fluid_param_file_path> --outputDir=<paddlejs_model_directory>
+```
+- 如果待转换的 fluid 模型为`分片参数文件`，即一个模型文件对应多个参数文件：
+``` bash
+# 注意，使用这种方式调用转换器，需要保证 inputDir 中，模型文件名为'__model__'
+python convertToPaddleJSModel.py --inputDir=<fluid_model_directory> --outputDir=<paddlejs_model_directory>
+````
+模型转换器将生成以下两种类型的文件以供 PaddleJS 使用：
+- model.json (模型结构与参数清单)
+- chunk_\*.dat (二进制参数文件集合)
+## 2. 详细文档
+参数 |  描述
+:-: | :-: 
+--inputDir | fluid 模型所在目录，当且仅当使用分片参数文件时使用该参数，将忽略 `modelPath` 和 `paramPath` 参数，且模型文件名必须为`__model__`
+--modelPath | fluid 模型文件所在路径，使用合并参数文件时使用该参数
+--paramPath | fluid 参数文件所在路径，使用合并参数文件时使用该参数
+--outputDir | `必要参数`， paddleJS 模型输出路径
+--optimize | 是否进行模型优化， `0` 为关闭优化，`1` 为开启优化（需安装 PaddleLite ），默认关闭优化
+--logModelInfo | 是否打印模型结构信息， `0` 为不打印， `1` 为打印，默认不打印
+--sliceDataSize | 分片输出 PaddleJS 参数文件时，每片文件的大小，单位：KB，默认 4096
+## 3. 其他信息
+若需要转换的模型为 `TensorFlow/Caffe/ONNX` 格式，可使用 PaddlePaddle 项目下的 `X2Paddle`工具，将其他格式的模型转为 fluid 模型后，再使用本工具转化为 PaddleJS 模型。
+详细请参考 [X2Paddle 项目](https://github.com/PaddlePaddle/X2Paddle)
\ No newline at end of file
--- a/tools/toWebModel.py
+++ b/tools/toWebModel.py
@@ -2,199 +2,110 @@
 # -*- coding: UTF-8 -*-
 import json
-import paddle.fluid as fluid
-import paddle
-import numpy as np
 import collections
 import math
-import sys as sys
+import sys
 import os
 import struct
+import argparse
+import shutil
+import stat
+import traceback
+import numpy as np
+import paddle.fluid as fluid
-#常量控制
-#抽样打印数据数量
-logDataCount = 50
 # 输入模型所在目录
-modelDir = "humanseg/"
+modelDir = None
 # 输入模型名
-modelName = "model"
+modelName = None
 # 输入参数名，当且仅当所有模型参数被保存在一个单独的二进制文件中，它才需要被指定，若为分片模型，请设置为None
 paramsName = None
-# 模型feed shape
+# 是否打印模型信息
-inputShape = (1, 3, 192, 192)
+enableLogModelInfo = False
-# 输入数据
-inputData = np.full(inputShape, 1, "float32")
 # 输出模型目录
-outputDir = "../dist/model/humanseg/"
+outputDir = None
-# 权重分片扩展名
+# 分片文件大小，单位：KB
-extensionName = ".dat"
+sliceDataSize = 4 * 1024
-# 输出各var数据
+# paddlepaddle运行程序实例
-outputVarData = False
+program = None
+# 存放模型结构
-# 确认fluid的版本号
+modelInfo = {"vars": [], "ops": []}
-print(paddle.__version__)
+# 存放参数数值（未排序）
+paramValuesDict = {}
-# 采样输出list数据，采样的个数logDataCount为常量
-def stridePrint1(data):
+def logModel(info):
-    dataCount = len(data)
+    """ 打印信息 """
-    stride = math.floor(dataCount / logDataCount)
+    if enableLogModelInfo:
-    if stride == 0:
+        print(info)
-        stride = 1
-    nums = []
+def sortDict(oldDict, reverse=False):
-    # outputCount = logDataCount
+    """ 对字典进行排序，返回有序字典，默认升序 """
-    # if dataCount < logDataCount:
-    #     outputCount = dataCount
-    # for i in range(outputCount):
-    #     # nums.append(str(i) + ": " + str(data[i]))
-    #     nums.append(data[i])
-    for i in range(0, logDataCount):
-        item = data[i * stride]
-        nums.append(str(i * stride) + ": " + str(item))
-    print(nums)
-def stridePrint(tensor):
-    length = len(tensor)
-#    if length < 3000:
-#        print(tensor)
-#        return
-    size = 20
-    stride = math.floor(length / size)
-    if stride == 0:
-        stride = 1
-    size = math.floor(length / stride)
-    nums = []
-    for i in range(0, size):
-        item = tensor[i * stride]
-        nums.append(str(i * stride) + ": " + str(item))
-    print(nums)
-# 对字典进行排序，返回有序字典，默认升序
-def sortDict(oldDict, reverse = False):
    # 获得排序后的key list
-    keys = sorted(oldDict.keys(), reverse = reverse)
+    keys = sorted(oldDict.keys(), reverse=reverse)
    orderDict = collections.OrderedDict()
    # 遍历 key 列表
    for key in keys:
        orderDict[key] = oldDict[key]
    return orderDict
+def dumpModelToJsonFile():
+    """ 导出模型数据到json文件 """
+    print("Dumping model structure to json file...")
+    if not os.path.exists(outputDir):
+        os.makedirs(outputDir)
+    outputModelPath = os.path.join(outputDir, "model.json")
+    with open(outputModelPath, 'w') as outputFile:
+        json.dump(modelInfo, outputFile, indent=4, separators=(", ", ": "), sort_keys=True)
+    print("Dumping model structure to json file successfully")
-# 将权重数据分片输出到文件，默认分片策略为按4M分片
+def sliceDataToBinaryFile(paramValueList):
-def sliceDataToBinaryFile(weightValueList, sliceMethod = 0):
+    """ 将参数数据分片输出到文件，默认分片策略为按4M分片 """
-    # TODO: 分片这里不太对，待修改
+    totalParamValuesCount = len(paramValueList)
-    totalWeightCount = len(weightValueList)
+    countPerSlice = int(sliceDataSize * 1024 / 4)
-    countPerSlice = 0
-    # sliceCount = 0
-    if sliceMethod == 0:
-        # 分片策略 0:按4M分片
-        countPerSlice = int(4 * 1024 * 1024 / 4)
-        # sliceCount = math.ceil(totalWeightCount / countPerSlice)
-    else:
-        # 分片策略 1:按<=4M等分
-        # TODO: 待实现
-        countPerSlice = 0
-        # sliceCount = 0
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)
    currentChunkIndex = 0
-    currentWeightIndex = 0
+    currentParamDataIndex = 0
-    while currentWeightIndex < totalWeightCount - 1:
+    while currentParamDataIndex < totalParamValuesCount - 1:
-        remainCount = totalWeightCount - currentWeightIndex
+        remainCount = totalParamValuesCount - currentParamDataIndex
        if remainCount < countPerSlice:
            countPerSlice = remainCount
-        chunkPath = outputDir + 'chunk_%s' % (currentChunkIndex + 1) + extensionName
+        chunkPath = os.path.join(outputDir, 'chunk_%s.dat' % (currentChunkIndex + 1))
        file = open(chunkPath, 'wb')
-        for i in weightValueList[currentWeightIndex : currentWeightIndex + countPerSlice]:
+        for i in paramValueList[currentParamDataIndex : currentParamDataIndex + countPerSlice]:
            byte = struct.pack('f', float(i))
            file.write(byte)
        file.close()
-        currentWeightIndex = currentWeightIndex + countPerSlice
+        currentParamDataIndex = currentParamDataIndex + countPerSlice
        currentChunkIndex = currentChunkIndex + 1
-        # for debug
+        print("Output No." + str(currentChunkIndex)+ " binary file, remain " + str(totalParamValuesCount - currentParamDataIndex) + " param values.")
-        print("第" + str(currentChunkIndex + 1) + "片权重输出完毕，输出个数：" + str(countPerSlice) + " 剩余个数:" + str(totalWeightCount - currentWeightIndex))
+    print("Slicing data to binary files successfully. (" + str(currentChunkIndex)+ " output files and " + str(currentParamDataIndex) + " param values)")
-    # for debug
+def reorderParamsValue():
-    print("========权重输出完毕，共" + str(currentWeightIndex) + "个数据，" + str(currentChunkIndex) + "个分片文件" + "========")
+    """ 对参数文件中的数值，按照variable.name字母序排序，返回排序后组合完成的value list """
+    paramValuesOrderDict = sortDict(paramValuesDict)
+    paramValues = []
+    for value in paramValuesOrderDict.values():
+        paramValues += value
+    return paramValues
-# 处理fluid的OP type与PaddleJS的OP type不对应情况
 def mapToPaddleJSTypeName(fluidOPName):
+    """ 处理fluid的OP type与PaddleJS的OP type不对应情况 """
    if fluidOPName == "batch_norm":
        return "batchnorm"
-    return fluidOPName
+    return fluidOPName  
-# 将shape扩充为4维
+def organizeModelVariableInfo():
-def padToFourDimShape(shape):
+    """ 组织参数信息 """
-    fourDimShape = []
+    print("Organizing model variables info...")
-    if len(shape) == 4:
-        fourDimShape = shape
-    elif len(shape) < 4:
-        for i in range(0, 4 - len(shape)):
-            fourDimShape.append(1)
-        fourDimShape = fourDimShape + shape
-    else:
-        return []
-    return fourDimShape
-# for debug，将NCHW排布的数据转为NHWC排布的数据
-def convertNCHW2NHWC(data, shape):
-    fourDimShape = padToFourDimShape(shape)
-    N = fourDimShape[0]
-    C = fourDimShape[1]
-    H = fourDimShape[2]
-    W = fourDimShape[3]
-    print(fourDimShape)
-    HXW = H * W
-    CXHXW = C * H * W
    index = 0
-    nhwcData = []
-    for n in range(0, N):
-        for h in range(0, H):
-            for w in range(0, W):
-                for c in range(0, C):
-                    nhwcData.append(data[n * CXHXW + c * HXW + h * W + w])
-                    index = index + 1
-    return nhwcData
-# for debug 输出特定varName对应的数据
-def writeTempOutputData(name):
-    # FIXME:待完善
-    return
-    dataList = np.array(fluid.global_scope().find_var(name).get_tensor()).flatten().tolist()
-    path = '/Users/bluebird/baidu/fluid_tools/check-temp/filter.txt'
-    if os.path.exists(path):
-        os.remove()
-        file = open(path,'a')
-        for a in range(0, len(dataList)):
-            file.write(str(dataList[a]))
-            file.write(",")
-        file.close()
-def convertToPaddleJSModel():
-    # 1. 初始化fluid运行环境和配置
-    exe = fluid.Executor(fluid.CPUPlace())
-    [prog, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=modelDir, executor=exe, model_filename=modelName, params_filename=paramsName)
-    out = exe.run(prog, feed={feed_target_names[0]: inputData}, fetch_list=fetch_targets, return_numpy=False)
-    print(out)
-    index = 0
-    # 存放模型结构
-    modelInfo = {"vars": [], "ops": []}
    # 存放var信息（未排序）
    varInfoDict = {}
-    # 存放权重数值（未排序）
+    # 获取program中所有的var，遍历并获取所有未排序的var信息和参数数值
-    weightValueDict = {}
+    vars = list(program.list_vars())
-    # 2. 获取program中所有的var，遍历并获取所有未排序的var信息和权重数值
-    vars = list(prog.list_vars())
    for v in vars:
        # 跳过feed和fetch
        if "feed" == v.name:
@@ -202,10 +113,6 @@ def convertToPaddleJSModel():
        if "fetch" == v.name:
            continue
-        print("Var index:" + str(index) + " name:" + v.name)
-        print(v)
-        index += 1
        varShape = list(v.shape)
        # FIXME:start paddlejs 不支持shape中为-1，这里需要手动过滤一下，支持了以后可以删除
@@ -220,40 +127,37 @@ def convertToPaddleJSModel():
        # 存放variable信息，在dump成json时排序
        varInfo = {}
        varInfo["shape"] = varShape
-        # 数据是否是持久化数据，如weight为持久化数据，op的output不是持久化数据
+        # 数据是否是持久化数据，如tensor为持久化数据，op的output不是持久化数据
        # 只输出持久化数据，paddlejs中也仅读取持久化数据
        varInfo["persistable"] = v.persistable
        varInfoDict[v.name] = varInfo
-        # for debug，输出var变量
+        logModel("[Var index:" + str(index) + " name:" + v.name + "]")
-        if outputVarData:
+        jsonDumpsIndentStr = json.dumps(varInfo, indent=2)
-            writeTempOutputData(v.name)
+        logModel(jsonDumpsIndentStr)
+        logModel("")
+        index += 1
-        # persistable数据存入weightDict，等待排序
+        # persistable数据存入paramValuesDict，等待排序
        if v.persistable:
            data = np.array(fluid.global_scope().find_var(v.name).get_tensor()).flatten().tolist()
-            weightValueDict[v.name] = data
+            paramValuesDict[v.name] = data
-    # 3. 对var信息dict，按照key（var名）进行字母顺序排序
+    # 对var信息dict，按照key（var名）进行字母顺序排序
    varInfoOrderDict = sortDict(varInfoDict)
-    # 4. 将var信息按照顺序，添加到model info的vars中
+    # 将var信息按照顺序，添加到model info的vars中
    for key, value in varInfoOrderDict.items():
        value["name"] = key
        modelInfo["vars"].append(value)
+    print("Organizing model variables info successfully.")
-    # 5. 对权重数值dict，按照key（权重名）进行字母顺序排序，并组合到一起
-    weightValueOrderDict = sortDict(weightValueDict)
-    weightValues = []
-    for key, value in weightValueOrderDict.items():
-        weightValues += value
-    # 6. 分片输出权重
-    sliceDataToBinaryFile(weightValues)
-    # 7. 获取program中所有的op，按op顺序加入到model info
+def organizeModelOpInfo():
-    ops = prog.current_block().ops
+    """ 组织模型OP结构信息 """
+    print("Organizing model operators info...")
+    ops = program.current_block().ops
    feedOutputName = None
+    index = 0
    for op in ops:
        opInfo = {}
@@ -267,9 +171,7 @@ def convertToPaddleJSModel():
            if len(value) <= 0:
                continue
            if value[0] == feedOutputName:
-                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入
+                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入，这里修改feed后面的OP的input为image，建立前后关联
-                # 这里修改feed后面的OP的input为image，建立前后关联
-                # 这里可能会有问题
                inputs[name] = ["image"]
            else:
                inputs[name] = value
@@ -282,8 +184,7 @@ def convertToPaddleJSModel():
            if len(value) <= 0:
                continue
            if op.type == "feed":
-                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入
+                # FIXME:workaround,PaddleJSfeed 输入必须是image，且为单输入，这里保存原始的输出名，以便映射
-                # 这里可能会有问题
                feedOutputName = value[0]
                outputs[name] = ["image"]
            else:
@@ -302,12 +203,65 @@ def convertToPaddleJSModel():
        # 存入modelInfo 
        modelInfo["ops"].append(opInfo)
+        logModel("[OP index:" + str(index) + " type:" + op.type + "]")
+        jsonDumpsIndentStr = json.dumps(opInfo, indent=2)
+        logModel(jsonDumpsIndentStr)
+        logModel("")
+        index += 1
+    print("Organizing model operators info successfully.")
-    # 8. 模型信息按照key字母顺序导出到json
+def convertToPaddleJSModel():
-    outputModelPath = outputDir + "model.json"
+    """ 转换fluid modle为paddleJS model """
-    with open(outputModelPath, 'w') as outputFile:
+    # 初始化fluid运行环境和配置
-        json.dump(modelInfo, outputFile, indent = 4, separators=(", ", ": "), sort_keys = True)
+    exe = fluid.Executor(fluid.CPUPlace())
+    result = fluid.io.load_inference_model(dirname=modelDir, executor=exe, model_filename=modelName, params_filename=paramsName)
+    global program
+    program = result[0]
+    # 获取program中所有的op，按op顺序加入到model info
+    organizeModelOpInfo()
+    # 获取program中所有的var，按照字母顺序加入到model info，同时读取参数数值
+    organizeModelVariableInfo()
-    print("========模型结构输出完毕========")
+    # 导出模型文件到json
+    dumpModelToJsonFile()
-convertToPaddleJSModel()
+    # 对参数数值dict，按照key（参数名）进行字母顺序排序，并组合到一起
+    paramValues = reorderParamsValue()
+    # 导出分片参数文件
+    sliceDataToBinaryFile(paramValues) 
+if __name__ == "__main__":
+    try:
+        p = argparse.ArgumentParser(description='模型转换参数解析')
+        p.add_argument('--inputDir', help='fluid模型所在目录。当且仅当使用分片参数文件时使用该参数。将过滤modelPath和paramsPath参数，且模型文件名必须为`__model__`', required=False)
+        p.add_argument('--modelPath', help='fluid模型文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument('--paramPath', help='fluid参数文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument("--outputDir", help='paddleJS模型输出路径，必要参数', required=True)
+        p.add_argument("--logModelInfo", type=int, default=0, help='是否输出模型结构信息，非必要参数，0为不输出，1为输出，默认不输出', required=False)
+        p.add_argument("--sliceDataSize", type=int, default=4096, help='分片输出参数文件时，每片文件的大小，单位：KB，非必要参数，默认4096KB', required=False)
+        args = p.parse_args()
+        modelDir = args.inputDir
+        modelPath = args.modelPath
+        paramPath = args.paramPath
+        if not modelDir:
+            modelDir, modelName = os.path.split(modelPath)
+            paramDir, paramsName = os.path.split(paramPath)
+            if paramDir != modelDir:
+                print("\033[31mModel and param file should be put in a same directory!\033[0m")
+                raise Exception()
+        outputDir = args.outputDir
+        sliceDataSize = args.sliceDataSize
+        if args.logModelInfo == 1:
+            enableLogModelInfo = True
+        convertToPaddleJSModel()
+    except Exception as identifier:
+        print("\033[31mA fetal error occured. Failed to convert model.\033[0m")
+        print(traceback.format_exc())
+        pass
\ No newline at end of file
--- a/tools/ModelConverter/convertToPaddleJSModel.py
+++ b/tools/ModelConverter/convertToPaddleJSModel.py
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+import sys
+import os
+import argparse
+import shutil
+import stat
+import traceback
+def cleanTempModel(optimizedModelTempDir):
+    """ 清理opt优化完的临时模型文件 """
+    if os.path.exists(optimizedModelTempDir):
+        print("Cleaning optimized temporary model...")
+        shutil.rmtree(optimizedModelTempDir, onerror=grantWritePermission)
+def grantWritePermission(func, path, execinfo):
+    """ 文件授权 """
+    os.chmod(path, stat.S_IWRITE)
+    func(path)
+if __name__ == "__main__":
+    """
+    Example:
+    'python convertToPaddleJSModel.py --modelPath=../infer_model/MobileNetV2/__model__ --paramPath=../infer_model/MobileNetV2/params --outputDir=../jsmodel --optimize=1'
+    """
+    try:
+        p = argparse.ArgumentParser(description='转化为PaddleJS模型参数解析')
+        p.add_argument('--inputDir', help='fluid模型所在目录。当且仅当使用分片参数文件时使用该参数。将过滤modelPath和paramsPath参数，且模型文件名必须为`__model__`', required=False)
+        p.add_argument('--modelPath', help='fluid模型文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument('--paramPath', help='fluid参数文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument("--outputDir", help='paddleJS模型输出路径，必要参数', required=True)
+        p.add_argument("--optimize", type=int, default=0, help='是否进行模型优化，非必要参数，0为关闭优化，1为开启优化，默认关闭优化', required=False)
+        p.add_argument("--logModelInfo", type=int, default=0, help='是否输出模型结构信息，非必要参数，0为不输出，1为输出，默认不输出', required=False)
+        p.add_argument("--sliceDataSize", type=int, default=4096, help='分片输出参数文件时，每片文件的大小，单位：KB，非必要参数，默认4096KB', required=False)
+        args = p.parse_args()
+        # TODO: 由于PaddleLite和PaddlePaddle存在包冲突，因此将整个模型转换工具拆成两个python文件，由一个入口python文件通过命令行调用
+        optimizeCmd = " optimizeModel.py"
+        convertCmd = " convertModel.py"
+        inputDir = args.inputDir
+        modelPath = args.modelPath
+        paramPath = args.paramPath
+        outputDir = args.outputDir
+        enableOptimization = args.optimize
+        enableLogModelInfo = args.logModelInfo
+        sliceDataSize = args.sliceDataSize
+        optimizedModelTempDir = None
+        if enableOptimization == 1:
+            optimizedModelTempDir = os.path.join(outputDir, "optimize")
+            if inputDir:
+                optimizeCmd = optimizeCmd + " --inputDir=" + inputDir
+                convertCmd = convertCmd + " --inputDir=" + optimizedModelTempDir
+            else:
+                optimizeCmd = optimizeCmd + " --modelPath=" + modelPath + " --paramPath=" + paramPath
+                # optimizeModelPath, modelName = os.path.split(modelPath)
+                # optimizeParamPath, paramName = os.path.split(paramPath)
+                optimizeModelPath = os.path.join(optimizedModelTempDir, "model")
+                optimizeParamPath = os.path.join(optimizedModelTempDir, "params")
+                convertCmd = convertCmd + " --modelPath=" + optimizeModelPath + " --paramPath=" + optimizeParamPath
+            optimizeCmd = optimizeCmd + " --outputDir=" + optimizedModelTempDir
+        else:
+            if inputDir:
+                convertCmd = convertCmd + " --inputDir=" + inputDir
+            else:
+                convertCmd = convertCmd + " --modelPath=" + modelPath + " --paramPath=" + paramPath
+        convertCmd = convertCmd + " --outputDir=" + outputDir + " --sliceDataSize=" + str(sliceDataSize) + " --logModelInfo=" + str(args.logModelInfo)
+        print("============Convert Model Args=============")
+        if inputDir:
+            print("inputDir: " + inputDir)
+        else:
+            print("modelPath: " + modelPath)
+            print("paramPath: " + paramPath)
+        print("outputDir: " + outputDir)
+        print("enableOptimizeModel: " + str(enableOptimization))
+        print("enableLogModelInfo: " + str(enableLogModelInfo))
+        print("sliceDataSize:" + str(sliceDataSize))
+        pythonCmd = "python"
+        print("Starting...")
+        if enableOptimization:
+            print("Optimizing model...")
+            os.system(pythonCmd + optimizeCmd)
+            print("\033[32m\nOptimizing model successfully.\033[0m")
+        else:
+            print("\033[33mYou choosed not to optimize model, consequently, optimizing model is skiped.\033[0m")
+        print("Converting model...")
+        os.system(pythonCmd + convertCmd)
+        print("\033[32mConverting model successfully.\033[0m")
+        if enableOptimization:
+            cleanTempModel(optimizedModelTempDir)
+            print("Temporary files has been deleted successfully.")
+        print("\033[32m============ALL DONE============\033[0m")
+    except Exception as identifier:
+        print("\033[31mA fetal error occured. Failed to convert model.\033[0m")
+        print(traceback.format_exc())
+        pass
\ No newline at end of file
--- a/tools/ModelConverter/optimizeModel.py
+++ b/tools/ModelConverter/optimizeModel.py
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+import collections
+import argparse
+import traceback
+import paddlelite.lite as lite
+def optimizeModel(inputDir, modelPath, paramPath, outputDir):
+    """ 使用opt python接口执行模型优化 """
+    opt = lite.Opt()
+    if inputDir:
+        # 分片参数文件优化
+        opt.set_model_dir(inputDir)
+    else:
+        # 合并参数文件优化
+        opt.set_model_file(modelPath)
+        opt.set_param_file(paramPath)
+    opt.set_valid_places("arm")
+    opt.set_model_type("protobuf")
+    opt.set_optimize_out(outputDir)
+    optimize_passes = [
+        "lite_conv_elementwise_fuse_pass",
+        "lite_conv_bn_fuse_pass",
+        "lite_conv_elementwise_fuse_pass",
+        "lite_conv_activation_fuse_pass",
+        "lite_var_conv_2d_activation_fuse_pass",
+        "lite_fc_fuse_pass",
+        "lite_shuffle_channel_fuse_pass",
+        "lite_transpose_softmax_transpose_fuse_pass",
+        "lite_interpolate_fuse_pass",
+        "identity_scale_eliminate_pass",
+        "elementwise_mul_constant_eliminate_pass",
+        "lite_sequence_pool_concat_fuse_pass",
+        "lite_elementwise_add_activation_fuse_pass",
+        "static_kernel_pick_pass",
+        "variable_place_inference_pass",
+        "argument_type_display_pass",
+        "type_target_cast_pass",
+        "variable_place_inference_pass",
+        "argument_type_display_pass",
+        "io_copy_kernel_pick_pass",
+        "argument_type_display_pass",
+        "variable_place_inference_pass",
+        "argument_type_display_pass",
+        "type_precision_cast_pass",
+        "variable_place_inference_pass",
+        "argument_type_display_pass",
+        "type_layout_cast_pass",
+        "argument_type_display_pass",
+        "variable_place_inference_pass",
+        "argument_type_display_pass",
+        "runtime_context_assign_pass",
+        "argument_type_display_pass"
+    ]
+    opt.set_passes_internal(optimize_passes)
+    opt.run()
+if __name__ == "__main__":
+    try:
+        p = argparse.ArgumentParser('模型优化参数解析')
+        p.add_argument('--inputDir', help='fluid模型所在目录。当且仅当使用分片参数文件时使用该参数。将过滤modelPath和paramsPath参数，且模型文件名必须为`__model__`', required=False)
+        p.add_argument('--modelPath', help='fluid模型文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument('--paramPath', help='fluid参数文件所在路径，使用合并参数文件时使用该参数', required=False)
+        p.add_argument("--outputDir", help='优化后fluid模型目录，必要参数', required=True)
+        args = p.parse_args()
+        inputDir = args.inputDir
+        modelPath = args.modelPath
+        paramPath = args.paramPath
+        outputDir = args.outputDir
+        optimizeModel(inputDir, modelPath, paramPath, outputDir)
+    except Exception as identifier:
+        print("\033[31mA fetal error occured. Failed to optimize model.\033[0m")
+        print(traceback.format_exc())
+        pass
\ No newline at end of file