diff --git a/.gitignore b/.gitignore index 9d85e7a8cf7038d61fade0c1f6437b1b82dd1f82..caf886a2b581b5976ea391aca5d7a56041bdbaa8 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,5 @@ paddleocr.egg-info/ /deploy/android_demo/app/PaddleLite/ /deploy/android_demo/app/.cxx/ /deploy/android_demo/app/cache/ +test_tipc/web/models/ +test_tipc/web/node_modules/ diff --git a/test_tipc/docs/test_inference_js.md b/test_tipc/docs/test_inference_js.md new file mode 100644 index 0000000000000000000000000000000000000000..c0b7d653aef8c5e583fa86b7947a3b563d03c32f --- /dev/null +++ b/test_tipc/docs/test_inference_js.md @@ -0,0 +1,50 @@ +# Web 端基础预测功能测试 + +Web 端主要基于 Jest-Puppeteer 完成 e2e 测试,其中 Puppeteer 操作 Chrome 完成推理流程,Jest 完成测试流程。 +>Puppeteer 是一个 Node 库,它提供了一个高级 API 来通过 DevTools 协议控制 Chromium 或 Chrome +>Jest 是一个 JavaScript 测试框架,旨在确保任何 JavaScript 代码的正确性。 +#### 环境准备 + +* 安装 Node(包含 npm ) (https://nodejs.org/zh-cn/download/) +* 确认是否安装成功,在命令行执行 +```sh +# 显示所安 node 版本号,即表示成功安装 +node -v +``` +* 确认 npm 是否安装成成 +```sh +# npm 随着 node 一起安装,一般无需额外安装 +# 显示所安 npm 版本号,即表示成功安装 +npm -v +``` + +#### 使用 +```sh +# web 测试环境准备 +bash test_tipc/prepare_js.sh 'js_infer' +# web 推理测试 +bash test_tipc/test_inference_js.sh +``` + +#### 流程设计 + +###### paddlejs prepare + 1. 判断 node, npm 是否安装 + 2. 下载测试模型,当前检测模型是 ch_PP-OCRv2_det_infer ,识别模型是 ch_PP-OCRv2_rec_infer[1, 3, 32, 320]。如果需要替换模型,可直接将模型文件放在test_tipc/web/models/目录下。 + - 文本检测模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar + - 文本识别模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar + - 文本识别模型[1, 3, 32, 320]:https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar + - 保证较为准确的识别效果,需要将文本识别模型导出为输入shape是[1, 3, 32, 320]的静态模型 + 3. 转换模型, model.pdmodel model.pdiparams 转换为 model.json chunk.dat(检测模型保存地址:test_tipc/web/models/ch_PP-OCRv2/det,识别模型保存地址:test_tipc/web/models/ch_PP-OCRv2/rec) + 4. 安装最新版本 ocr sdk @paddlejs-models/ocr@latest + 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer,如果检查到已经安装,则不会进行二次安装 + + ###### paddlejs infer test + 1. Jest 执行 server command:`python3 -m http.server 9811` 开启本地服务 + 2. 启动 Jest 测试服务,通过 jest-puppeteer 插件完成 chrome 操作,加载 @paddlejs-models/ocr 脚本完成推理流程 + 3. 测试用例为原图识别后的文本结果与预期文本结果(expect.json)进行对比,测试通过有两个标准: + * 原图识别结果逐字符与预期结果对比,误差不超过 **10个字符**; + * 原图识别结果每个文本框字符内容与预期结果进行相似度对比,相似度不小于 0.9(全部一致则相似度为1)。 + + 只有满足上述两个标准,视为测试通过。通过为如下显示: + diff --git a/test_tipc/prepare_js.sh b/test_tipc/prepare_js.sh new file mode 100644 index 0000000000000000000000000000000000000000..7bcdeb35a40245351031f2125dcb4cb6e96bc503 --- /dev/null +++ b/test_tipc/prepare_js.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +set -o errexit +set -o nounset +shopt -s extglob + +# paddlejs prepare 主要流程 +# 1. 判断 node, npm 是否安装 +# 2. 下载测试模型,当前检测模型是 ch_PP-OCRv2_det_infer ,识别模型是 ch_PP-OCRv2_rec_infer [1, 3, 32, 320]。如果需要替换模型,可直接将模型文件放在test_tipc/web/models/目录下。 +# - 文本检测模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar +# - 文本识别模型:https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar +# - 文本识别模型[1, 3, 32, 320]:https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar +# - 保证较为准确的识别效果,需要将文本识别模型导出为输入shape[1, 3, 32, 320]的静态模型 +# 3. 转换模型, model.pdmodel model.pdiparams 转换为 model.json chunk.dat(检测模型保存地址:test_tipc/web/models/ch_PP-OCRv2/det,识别模型保存地址:test_tipc/web/models/ch_PP-OCRv2/rec) +# 4. 安装最新版本 ocr sdk @paddlejs-models/ocr@latest +# 5. 安装测试环境依赖 puppeteer、jest、jest-puppeteer,如果检查到已经安装,则不会进行二次安装 + +# 判断是否安装了node +if ! type node >/dev/null 2>&1; then + echo -e "\033[31m node 未安装 \033[0m" + exit +fi + +# 判断是否安装了npm +if ! type npm >/dev/null 2>&1; then + echo -e "\033[31m npm 未安装 \033[0m" + exit +fi + +# MODE be 'js_infer' +MODE=$1 +# js_infer MODE , load model file and convert model to js_infer +if [ ${MODE} != "js_infer" ];then + echo "Please change mode to 'js_infer'" + exit +fi + + +# saved_model_name +det_saved_model_name=ch_PP-OCRv2_det_infer +rec_saved_model_name=ch_PP-OCRv2_rec_infer + +# model_path +model_path=test_tipc/web/models/ + +rm -rf $model_path + +echo ${model_path}${det_saved_model_name} +echo ${model_path}${rec_saved_model_name} + +# download ocr_det inference model +wget -nc -P $model_path https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar +cd $model_path && tar xf ch_PP-OCRv2_det_infer.tar && cd ../../../ + +# download ocr_rec inference model +wget -nc -P $model_path https://paddlejs.bj.bcebos.com/models/ch_PP-OCRv2_rec_infer.tar +cd $model_path && tar xf ch_PP-OCRv2_rec_infer.tar && cd ../../../ + +MYDIR=`pwd` +echo $MYDIR + +pip3 install paddlejsconverter + +# convert inference model to web model: model.json、chunk.dat +paddlejsconverter \ + --modelPath=$model_path$det_saved_model_name/inference.pdmodel \ + --paramPath=$model_path$det_saved_model_name/inference.pdiparams \ + --outputDir=$model_path$det_saved_model_name/ \ + +paddlejsconverter \ + --modelPath=$model_path$rec_saved_model_name/inference.pdmodel \ + --paramPath=$model_path$rec_saved_model_name/inference.pdiparams \ + --outputDir=$model_path$rec_saved_model_name/ \ + +# always install latest ocr sdk +cd test_tipc/web +echo -e "\033[33m Installing the latest ocr sdk... \033[0m" +npm install @paddlejs-models/ocr@latest +npm info @paddlejs-models/ocr +echo -e "\033[32m The latest ocr sdk installed completely.!~ \033[0m" + +# install dependencies +if [ `npm list --dept 0 | grep puppeteer | wc -l` -ne 0 ] && [ `npm list --dept 0 | grep jest | wc -l` -ne 0 ];then + echo -e "\033[32m Dependencies have installed \033[0m" +else + echo -e "\033[33m Installing dependencies ... \033[0m" + npm install jest jest-puppeteer puppeteer + echo -e "\033[32m Dependencies installed completely.!~ \033[0m" +fi + +# del package-lock.json +rm package-lock.json diff --git a/test_tipc/test_inference_js.sh b/test_tipc/test_inference_js.sh new file mode 100644 index 0000000000000000000000000000000000000000..e96b187526a390391380a9000988d42ecd3aab38 --- /dev/null +++ b/test_tipc/test_inference_js.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +cd test_tipc/web +# run ocr test in chrome +./node_modules/.bin/jest --config ./jest.config.js diff --git a/test_tipc/web/expect.json b/test_tipc/web/expect.json new file mode 100644 index 0000000000000000000000000000000000000000..a60c80a7c855729c7e1fd5f21d18449d33d2c7af --- /dev/null +++ b/test_tipc/web/expect.json @@ -0,0 +1,20 @@ +{ + "text": [ + "纯臻营养护发素", + "产品信息/参数", + "(45元/每公斤,100公斤起订)", + "每瓶22元,1000瓶起订)", + "【品牌】:代加工方式/OEMODM", + "【品名】:纯臻营养护发素", + "【产品编号】:YM-X-3011", + "ODMOEM", + "【净含量】:220ml", + "【适用人群】:适合所有肤质", + "【主要成分】:鲸蜡硬脂醇、燕麦β-葡聚", + "糖、椰油酰胺丙基甜菜碱、泛醌", + "(成品包材)", + "【主要功能】:可紧致头发磷层,从而达到", + "即时持久改善头发光泽的效果,给干燥的头", + "发足够的滋养" + ] +} diff --git a/test_tipc/web/index.html b/test_tipc/web/index.html new file mode 100644 index 0000000000000000000000000000000000000000..39921fbf0fddc00efe516d85e2b80d0467e87d39 --- /dev/null +++ b/test_tipc/web/index.html @@ -0,0 +1,13 @@ + + + + + + + ocr test + + + + + + \ No newline at end of file diff --git a/test_tipc/web/index.test.js b/test_tipc/web/index.test.js new file mode 100644 index 0000000000000000000000000000000000000000..e07aed82e9e21571d5de5416a77aed3394ab9885 --- /dev/null +++ b/test_tipc/web/index.test.js @@ -0,0 +1,82 @@ +const expectData = require('./expect.json'); + +describe('e2e test ocr model', () => { + + beforeAll(async () => { + await page.goto(PATH); + }); + + it('ocr infer and diff test', async () => { + page.on('console', msg => console.log('PAGE LOG:', msg.text())); + + const text = await page.evaluate(async () => { + const $ocr = document.querySelector('#ocr'); + const ocr = paddlejs['ocr']; + await ocr.init('./models/ch_PP-OCRv2_det_infer', './models/ch_PP-OCRv2_rec_infer'); + const res = await ocr.recognize($ocr); + return res.text; + }); + // 模型文字识别结果与预期结果diff的字符数 + let diffNum = 0; + // 文本框字符串相似度 + let similarity = 0; + // 预期字符diff数 + const expectedDiffNum = 10; + // 预期文本框字符串相似度 + const expecteSimilarity = 0.9; + // 预期文本内容 + const expectResult = expectData.text; + + expectResult && expectResult.forEach((item, index) => { + const word = text[index]; + // 逐字符对比 + for(let i = 0; i < item.length; i++) { + if (item[i] !== word[i]) { + console.log('expect: ', item[i], ' word: ', word[i]); + diffNum++; + } + } + // 文本框字符串相似度对比 + const s = similar(item, word); + similarity += s; + }); + + similarity = similarity / expectResult.length; + + expect(diffNum).toBeLessThanOrEqual(expectedDiffNum); + + expect(similarity).toBeGreaterThanOrEqual(expecteSimilarity); + + function similar(string, expect) { + if (!string || !expect) { + return 0; + } + const length = string.length > expect.length ? string.length : expect.length; + const n = string.length; + const m = expect.length; + let data = []; + const min = (a, b, c) => { + return a < b ? (a < c ? a : c) : (b < c ? b : c); + }; + let i, j, si, ej, cost; + if (n === 0) return m; + if (m === 0) return n; + for (i = 0; i <= n; i++) { + data[i] = []; + [i][0] = i + } + for (j = 0; j <= m; j++) { + data[0][j] = j; + } + for (i = 1; i <= n; i++) { + si = string.charAt(i - 1); + for (j = 1; j <= m; j++) { + ej = expect.charAt(j - 1); + cost = si === ej ? 0 : 1; + data[i][j] = min(data[i - 1][j] + 1, data[i][j - 1] + 1, data[i - 1][j - 1] + cost); + } + } + return (1 - data[n][m] / length); + } + }); +}); diff --git a/test_tipc/web/jest-puppeteer.config.js b/test_tipc/web/jest-puppeteer.config.js new file mode 100644 index 0000000000000000000000000000000000000000..ac60eea6b6c8b6c189a997fb8bc8ed0f5d74e606 --- /dev/null +++ b/test_tipc/web/jest-puppeteer.config.js @@ -0,0 +1,14 @@ +// jest-puppeteer.config.js +module.exports = { + launch: { + headless: false, + product: 'chrome' + }, + browserContext: 'default', + server: { + command: 'python3 -m http.server 9811', + port: 9811, + launchTimeout: 10000, + debug: true + } +}; diff --git a/test_tipc/web/jest.config.js b/test_tipc/web/jest.config.js new file mode 100644 index 0000000000000000000000000000000000000000..aed1573efa527bab488498331aa1683cdb42929a --- /dev/null +++ b/test_tipc/web/jest.config.js @@ -0,0 +1,111 @@ +// For a detailed explanation regarding each configuration property and type check, visit: +// https://jestjs.io/docs/en/configuration.html + +module.exports = { + preset: 'jest-puppeteer', + // All imported modules in your tests should be mocked automatically + // automock: false, + + // Automatically clear mock calls and instances between every test + clearMocks: true, + + // An object that configures minimum threshold enforcement for coverage results + // coverageThreshold: undefined, + + // A set of global variables that need to be available in all test environments + globals: { + PATH: 'http://localhost:9811' + }, + + // The maximum amount of workers used to run your tests. Can be specified as % or a number. E.g. maxWorkers: 10% will use 10% of your CPU amount + 1 as the maximum worker number. maxWorkers: 2 will use a maximum of 2 workers. + // maxWorkers: "50%", + + // An array of directory names to be searched recursively up from the requiring module's location + // moduleDirectories: [ + // "node_modules" + // ], + + // An array of file extensions your modules use + moduleFileExtensions: [ + 'js', + 'json', + 'jsx', + 'ts', + 'tsx', + 'node' + ], + + + // The root directory that Jest should scan for tests and modules within + // rootDir: undefined, + + // A list of paths to directories that Jest should use to search for files in + roots: [ + '' + ], + + // Allows you to use a custom runner instead of Jest's default test runner + // runner: "jest-runner", + + // The paths to modules that run some code to configure or set up the testing environment before each test + // setupFiles: [], + + // A list of paths to modules that run some code to configure or set up the testing framework before each test + // setupFilesAfterEnv: [], + + // The number of seconds after which a test is considered as slow and reported as such in the results. + // slowTestThreshold: 5, + + // A list of paths to snapshot serializer modules Jest should use for snapshot testing + // snapshotSerializers: [], + + // The test environment that will be used for testing + // testEnvironment: 'jsdom', + + // Options that will be passed to the testEnvironment + // testEnvironmentOptions: {}, + + // An array of regexp pattern strings that are matched against all test paths, matched tests are skipped + testPathIgnorePatterns: [ + '/node_modules/' + ], + + // The regexp pattern or array of patterns that Jest uses to detect test files + testRegex: '.(.+)\\.test\\.(js|ts)$', + + // This option allows the use of a custom results processor + // testResultsProcessor: undefined, + + // This option allows use of a custom test runner + // testRunner: "jest-circus/runner", + + // This option sets the URL for the jsdom environment. It is reflected in properties such as location.href + testURL: 'http://localhost:9898/', + + // Setting this value to "fake" allows the use of fake timers for functions such as "setTimeout" + // timers: "real", + + // A map from regular expressions to paths to transformers + transform: { + '^.+\\.js$': 'babel-jest' + }, + + // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation + transformIgnorePatterns: [ + '/node_modules/', + '\\.pnp\\.[^\\/]+$' + ], + + // An array of regexp pattern strings that are matched against all modules before the module loader will automatically return a mock for them + // unmockedModulePathPatterns: undefined, + + // Indicates whether each individual test should be reported during the run + verbose: true, + + // An array of regexp patterns that are matched against all source file paths before re-running tests in watch mode + // watchPathIgnorePatterns: [], + + // Whether to use watchman for file crawling + // watchman: true, + testTimeout: 50000 +}; diff --git a/test_tipc/web/test.jpg b/test_tipc/web/test.jpg new file mode 100644 index 0000000000000000000000000000000000000000..60682be64f34058a3c388b146873d972fa15dd8a Binary files /dev/null and b/test_tipc/web/test.jpg differ