diff --git a/deploy/paddleocr-go/README.md b/deploy/paddleocr-go/README.md index b4f547c3cbb1c826c49ef1311b9931f170a74ce2..67538b78fece893e30185a9f737e6abebe1d9e9f 100644 --- a/deploy/paddleocr-go/README.md +++ b/deploy/paddleocr-go/README.md @@ -261,7 +261,7 @@ func (ocr *OCRSystem) StartServer(port string) 当前给定的配置文件`config/conf.yaml`中,包含了默认的OCR预测配置参数,可根据个人需要更改相关参数。 -比如,将`use_gpu`改为`false`,使用CPU执行预测;将`det_model_dir`, `rec_model_dir`, `cls_model_dir`都更改为自己的本地模型路径,也或者是更改字典`rec_char_dict_path`的路径。配置参数包含了预测引擎、检测模型、检测阈值、方向分类模型、识别模型及阈值的相关参数,具体参数的意义可参见[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E)。 +比如,将`use_gpu`改为`false`,使用CPU执行预测;将`det_model_dir`, `rec_model_dir`, `cls_model_dir`都更改为自己的本地模型路径,也或者是更改字典`rec_char_dict_path`的路径,这四个路径如果配置http链接,会自动下载到本地目录。另外,配置参数包含了预测引擎、检测模型、检测阈值、方向分类模型、识别模型及阈值的相关参数,具体参数的意义可参见[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/doc/doc_ch/whl.md#%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E)。 ### 3.2 编译预测demo diff --git a/deploy/paddleocr-go/config/conf.yaml b/deploy/paddleocr-go/config/conf.yaml index c6b5b7914bfc51cb0e14a614bf696c34083097cf..d2892defcf5989a1abcff1240b5e6da31c7a3ba9 100644 --- a/deploy/paddleocr-go/config/conf.yaml +++ b/deploy/paddleocr-go/config/conf.yaml @@ -30,7 +30,7 @@ rec_image_shape: [3, 32, 320] rec_char_type: "ch" rec_batch_num: 30 max_text_length: 25 -rec_char_dict_path: "config/ppocr_keys_v1.txt" +rec_char_dict_path: "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt" use_space_char: true # params for text classifier diff --git a/deploy/paddleocr-go/ocr/default_args.go b/deploy/paddleocr-go/ocr/default_args.go index 6afd69e7fa01cd9227c3679a3d07dc781e8a2638..5ccad4558975b907f71011bd58715c8ed06d0c05 100644 --- a/deploy/paddleocr-go/ocr/default_args.go +++ b/deploy/paddleocr-go/ocr/default_args.go @@ -28,7 +28,7 @@ var ( "rec_char_type": "ch", "rec_batch_num": 30, "max_text_length": 25, - "rec_char_dict_path": "config/ppocr_keys_v1.txt", + "rec_char_dict_path": "https://raw.githubusercontent.com/PaddlePaddle/PaddleOCR/develop/ppocr/utils/ppocr_keys_v1.txt", "use_space_char": true, "use_angle_cls": false, diff --git a/deploy/paddleocr-go/ocr/ocr_cls.go b/deploy/paddleocr-go/ocr/ocr_cls.go index c46d6e8e55a6090dfd6839badc1963e8c50d18ea..24c9427bc1a7e19ffb2a50f9099c9d11dfef689b 100644 --- a/deploy/paddleocr-go/ocr/ocr_cls.go +++ b/deploy/paddleocr-go/ocr/ocr_cls.go @@ -2,6 +2,7 @@ package ocr import ( "log" + "os" "time" "github.com/LKKlein/gocv" @@ -34,7 +35,8 @@ func NewTextClassifier(modelDir string, args map[string]interface{}) *TextClassi shape: shapes, } if checkModelExists(modelDir) { - modelDir, _ = downloadModel("./inference/cls", modelDir) + home, _ := os.UserHomeDir() + modelDir, _ = downloadModel(home+"/.paddleocr/cls", modelDir) } else { log.Panicf("cls model path: %v not exist! Please check!", modelDir) } diff --git a/deploy/paddleocr-go/ocr/ocr_det.go b/deploy/paddleocr-go/ocr/ocr_det.go index 4773d5bed17887c91ac21054b162deae9ce962ce..e6f6656104881054a9f4adc385b4cbca76d5a17e 100644 --- a/deploy/paddleocr-go/ocr/ocr_det.go +++ b/deploy/paddleocr-go/ocr/ocr_det.go @@ -2,6 +2,7 @@ package ocr import ( "log" + "os" "time" "github.com/LKKlein/gocv" @@ -25,7 +26,8 @@ func NewDBDetector(modelDir string, args map[string]interface{}) *DBDetector { postProcess: NewDBPostProcess(thresh, boxThresh, unClipRatio), } if checkModelExists(modelDir) { - modelDir, _ = downloadModel("./inference/det", modelDir) + home, _ := os.UserHomeDir() + modelDir, _ = downloadModel(home+"/.paddleocr/det", modelDir) } else { log.Panicf("det model path: %v not exist! Please check!", modelDir) } diff --git a/deploy/paddleocr-go/ocr/ocr_rec.go b/deploy/paddleocr-go/ocr/ocr_rec.go index 661718c87778030bf6efb9d03c51d45a4fb0e30c..d0d0ffa5521044b126d9ea4349f076a83a89f5d2 100644 --- a/deploy/paddleocr-go/ocr/ocr_rec.go +++ b/deploy/paddleocr-go/ocr/ocr_rec.go @@ -2,6 +2,7 @@ package ocr import ( "log" + "os" "time" "github.com/LKKlein/gocv" @@ -37,7 +38,8 @@ func NewTextRecognizer(modelDir string, args map[string]interface{}) *TextRecogn labels: labels, } if checkModelExists(modelDir) { - modelDir, _ = downloadModel("./inference/rec/ch", modelDir) + home, _ := os.UserHomeDir() + modelDir, _ = downloadModel(home+"/.paddleocr/rec/ch", modelDir) } else { log.Panicf("rec model path: %v not exist! Please check!", modelDir) } diff --git a/deploy/paddleocr-go/ocr/utils.go b/deploy/paddleocr-go/ocr/utils.go index 0603d2bba54e9eb9cd744121ca2aaa4df144cee1..16c3cec8328f05614ea8e45003531b7cd006af50 100644 --- a/deploy/paddleocr-go/ocr/utils.go +++ b/deploy/paddleocr-go/ocr/utils.go @@ -9,7 +9,6 @@ import ( "os" "path" "path/filepath" - "regexp" "strings" "github.com/LKKlein/gocv" @@ -154,8 +153,10 @@ func isPathExist(path string) bool { func downloadModel(modelDir, modelPath string) (string, error) { if modelPath != "" && (strings.HasPrefix(modelPath, "http://") || strings.HasPrefix(modelPath, "ftp://") || strings.HasPrefix(modelPath, "https://")) { - reg := regexp.MustCompile("^(http|https|ftp)://[^/]+/(.+)") - suffix := reg.FindStringSubmatch(modelPath)[2] + if checkModelExists(modelDir) { + return modelDir, nil + } + _, suffix := path.Split(modelPath) outPath := filepath.Join(modelDir, suffix) outDir := filepath.Dir(outPath) if !isPathExist(outDir) { @@ -168,16 +169,13 @@ func downloadModel(modelDir, modelPath string) (string, error) { return "", err } } - if strings.HasSuffix(outPath, ".tar") { - _, f := path.Split(suffix) - nextDir := strings.TrimSuffix(f, ".tar") - finalPath := modelDir + "/" + nextDir - if !checkModelExists(finalPath) { - unTar(modelDir, outPath) - } - return finalPath, nil + + if strings.HasSuffix(outPath, ".tar") && !checkModelExists(modelDir) { + unTar(modelDir, outPath) + os.Remove(outPath) + return modelDir, nil } - return outPath, nil + return modelDir, nil } return modelPath, nil } @@ -202,15 +200,16 @@ func unTar(dst, src string) (err error) { continue } - dstFileDir := filepath.Join(dst, hdr.Name) + var dstFileDir string + if strings.Contains(hdr.Name, "model") { + dstFileDir = filepath.Join(dst, "model") + } else if strings.Contains(hdr.Name, "params") { + dstFileDir = filepath.Join(dst, "params") + } switch hdr.Typeflag { case tar.TypeDir: - if b := isPathExist(dstFileDir); !b { - if err := os.MkdirAll(dstFileDir, 0775); err != nil { - return err - } - } + continue case tar.TypeReg: file, err := os.OpenFile(dstFileDir, os.O_CREATE|os.O_RDWR, os.FileMode(hdr.Mode)) if err != nil { @@ -227,10 +226,24 @@ func unTar(dst, src string) (err error) { return nil } -func readLines2StringSlice(path string) []string { - content, err := ioutil.ReadFile(path) +func readLines2StringSlice(filepath string) []string { + if strings.HasPrefix(filepath, "http://") || strings.HasPrefix(filepath, "https://") { + home, _ := os.UserHomeDir() + dir := home + "/.paddleocr/rec/" + _, suffix := path.Split(filepath) + f := dir + suffix + if !isPathExist(f) { + err := downloadFile(f, filepath) + if err != nil { + log.Println("download ppocr key file error!") + return nil + } + } + filepath = f + } + content, err := ioutil.ReadFile(filepath) if err != nil { - log.Println("read file error!") + log.Println("read ppocr key file error!") return nil } lines := strings.Split(string(content), "\n")