update doc for whl

a5df6c34 · andyjpaddle · 59812f6b · a5df6c34 · a5df6c34 · a5df6c34
5 changed file
--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -75,6 +75,11 @@ cd /path/to/ppocr_img
  ......
  ```

+  此外，paddleocr也支持输入pdf文件，并且可以通过指定参数`page_num`来控制推理前面几页，默认为0，表示推理所有页。
+  ```bash
+  paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+  ```
+
 - 单独使用检测：设置`--rec`为`false`

  ```bash
@@ -165,12 +170,14 @@ from paddleocr import PaddleOCR, draw_ocr
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
 img_path = './imgs/11.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -196,6 +203,50 @@ im_show.save('result.jpg')

 <a name="3"></a>

+如果输入是PDF文件，那么可以参考下面代码进行可视化
+
+```python
+from paddleocr import PaddleOCR, draw_ocr
+
+# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
+# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
+ocr = PaddleOCR(use_angle_cls=True, lang="ch"， page_num=2)  # need to run only once to download and load model into memory
+img_path = './xxx.pdf'
+result = ocr.ocr(img_path, cls=True)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
+
+# 显示结果
+import fitz
+from PIL import Image
+import cv2
+import numpy as np
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.pageCount):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.getPixmap(matrix=mat, alpha=False)
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+for idx in range(len(result)):
+    res = result[idx]
+    image = imgs[idx]
+    boxes = [line[0] for line in res]
+    txts = [line[1][0] for line in res]
+    scores = [line[1][1] for line in res]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
+    im_show = Image.fromarray(im_show)
+    im_show.save('result_page_{}.jpg'.format(idx))
+```
+
 ## 3. 小结

 通过本节内容，相信您已经熟练掌握PaddleOCR whl包的使用方法并获得了初步效果。

--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -33,12 +33,14 @@ from paddleocr import PaddleOCR, draw_ocr
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -71,12 +73,14 @@ from paddleocr import PaddleOCR, draw_ocr
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, cls=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -109,8 +113,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True)  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 结果是一个list，每个item只包含识别结果和识别置信度
@@ -127,12 +133,14 @@ from paddleocr import PaddleOCR, draw_ocr
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, rec=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf')
 im_show = Image.fromarray(im_show)
@@ -163,8 +171,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 结果是一个list，每个item只包含识别结果和识别置信度
@@ -181,8 +191,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True)  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False, rec=False, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 结果是一个list，每个item只包含分类结果和分类置信度
@@ -212,6 +224,11 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
 ......
 ```

+此外，paddleocr也支持输入pdf文件，并且可以通过指定参数`page_num`来控制推理前面几页，默认为0，表示推理所有页。
+```bash
+paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+```
+
 * 检测+识别

 ```bash
@@ -290,12 +307,14 @@ ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_m
                use_angle_cls=True)
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -325,12 +344,14 @@ from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
 img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 download_with_progressbar(img_path, 'tmp.jpg')
 image = Image.open('tmp.jpg').convert('RGB')
 boxes = [line[0] for line in result]
@@ -362,12 +383,14 @@ img_path = 'PaddleOCR/doc/imgs/11.jpg'
 img = cv2.imread(img_path)
 # img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图，可以将这句话的注释取消
 result = ocr.ocr(img, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # 显示结果
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -376,14 +399,65 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+## 5 PDF文件作为输入
+- 命令行模式
+
+可以通过指定参数`page_num`来控制推理前面几页，默认为0，表示推理所有页。
+```bash
+paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+```
+- 代码使用
+
+```python
+from paddleocr import PaddleOCR, draw_ocr

-## 5 参数说明
+# Paddleocr目前支持的多语言语种可以通过修改lang参数进行切换
+# 例如`ch`, `en`, `fr`, `german`, `korean`, `japan`
+ocr = PaddleOCR(use_angle_cls=True, lang="ch"， page_num=2)  # need to run only once to download and load model into memory
+img_path = './xxx.pdf'
+result = ocr.ocr(img_path, cls=True)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
+
+# 显示结果
+import fitz
+from PIL import Image
+import cv2
+import numpy as np
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.pageCount):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.getPixmap(matrix=mat, alpha=False)
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+for idx in range(len(result)):
+    res = result[idx]
+    image = imgs[idx]
+    boxes = [line[0] for line in res]
+    txts = [line[1][0] for line in res]
+    scores = [line[1][1] for line in res]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
+    im_show = Image.fromarray(im_show)
+    im_show.save('result_page_{}.jpg'.format(idx))
+```
+
+## 6 参数说明

 | 字段                    | 说明                                                                                                                                                                                                                 | 默认值                  |
 |-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
 | use_gpu                 | 是否使用GPU                                                                                                                                                                                                          | TRUE                    |
 | gpu_mem                 | 初始化占用的GPU内存大小                                                                                                                                                                                              | 8000M                   |
-| image_dir               | 通过命令行调用时执行预测的图片或文件夹路径                                                                                                                                                                           |                         |
+| image_dir               | 通过命令行调用时执行预测的图片或文件夹路径                                                                                                                                                                           |  
+| page_num               | 当输入类型为pdf文件时有效，指定预测前面page_num页，默认预测所有页                     |        0                 |
 | det_algorithm           | 使用的检测算法类型                                                                                                                                                                                                   | DB                      |
 | det_model_dir          |  检测模型所在文件夹。传参方式有两种，1. None: 自动下载内置模型到 `~/.paddleocr/det`；2.自己转换好的inference模型路径，模型路径下必须包含model和params文件 |   None        |
 | det_max_side_len        | 检测算法前向时图片长边的最大尺寸，当长边超出这个值时会将长边resize到这个大小，短边等比例缩放                                                                                                                         | 960                     |

--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -86,6 +86,12 @@ If you do not use the provided test image, you can replace the following `--imag
  ......
  ```

+  pdf file is also supported, you can infer the first few pages by using the `page_num` parameter, the default is 0, which means infer all pages
+
+  ```bash
+  paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+  ```
+
 * Only detection: set `--rec` to `false`

  ```bash
@@ -176,12 +182,15 @@ from paddleocr import PaddleOCR,draw_ocr
 ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
 img_path = './imgs_en/img_12.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)


 # draw result
 from PIL import Image
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -206,6 +215,50 @@ Visualization of results
    <img src="../imgs_results/whl/12_det_rec.jpg" width="800">
 </div>

+If the input is a PDF file, you can refer to the following code for visualization
+
+```python
+from paddleocr import PaddleOCR, draw_ocr
+
+# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
+# to switch the language model in order.
+ocr = PaddleOCR(use_angle_cls=True, lang="ch"， page_num=2)  # need to run only once to download and load model into memory
+img_path = './xxx.pdf'
+result = ocr.ocr(img_path, cls=True)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
+
+# draw result
+import fitz
+from PIL import Image
+import cv2
+import numpy as np
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.pageCount):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.getPixmap(matrix=mat, alpha=False)
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+for idx in range(len(result)):
+    res = result[idx]
+    image = imgs[idx]
+    boxes = [line[0] for line in res]
+    txts = [line[1][0] for line in res]
+    scores = [line[1][1] for line in res]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
+    im_show = Image.fromarray(im_show)
+    im_show.save('result_page_{}.jpg'.format(idx))
+```

 <a name="3"></a>


--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -25,12 +25,14 @@ from paddleocr import PaddleOCR,draw_ocr
 ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
-
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # draw result
 from PIL import Image
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -60,11 +62,14 @@ from paddleocr import PaddleOCR,draw_ocr
 ocr = PaddleOCR(lang='en') # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
 result = ocr.ocr(img_path, cls=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # draw result
 from PIL import Image
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -94,8 +99,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to load model into memory
 img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
 result = ocr.ocr(img_path, det=False, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 Output will be a list, each item contains recognition text and confidence
@@ -109,12 +116,14 @@ from paddleocr import PaddleOCR,draw_ocr
 ocr = PaddleOCR() # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
 result = ocr.ocr(img_path,rec=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # draw result
 from PIL import Image
-
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/PaddleOCR/doc/fonts/simfang.ttf')
 im_show = Image.fromarray(im_show)
@@ -141,8 +150,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR(lang='en') # need to run only once to load model into memory
 img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
 result = ocr.ocr(img_path, det=False, cls=False)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 Output will be a list, each item contains recognition text and confidence
@@ -156,8 +167,10 @@ from paddleocr import PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True) # need to run only once to load model into memory
 img_path = 'PaddleOCR/doc/imgs_words_en/word_10.png'
 result = ocr.ocr(img_path, det=False, rec=False, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)
 ```

 Output will be a list, each item contains classification result and confidence
@@ -185,6 +198,11 @@ Output will be a list, each item contains bounding box, text and recognition con
 ......
 ```

+pdf file is also supported, you can infer the first few pages by using the `page_num` parameter, the default is 0, which means infer all pages
+```bash
+paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+```
+
 * detection and recognition
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs_en/img_12.jpg --lang en
@@ -253,11 +271,14 @@ from paddleocr import PaddleOCR,draw_ocr
 ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
 img_path = 'PaddleOCR/doc/imgs_en/img_12.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # draw result
 from PIL import Image
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -283,11 +304,14 @@ from paddleocr import PaddleOCR, draw_ocr
 ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
 img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # show result
 from PIL import Image
+result = result[0]
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -312,12 +336,14 @@ img_path = 'PaddleOCR/doc/imgs/11.jpg'
 img = cv2.imread(img_path)
 # img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), If your own training model supports grayscale images, you can uncomment this line
 result = ocr.ocr(img_path, cls=True)
-for line in result:
-    print(line)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

 # show result
 from PIL import Image
-
+result = result[0]
 download_with_progressbar(img_path, 'tmp.jpg')
 image = Image.open('tmp.jpg').convert('RGB')
 boxes = [line[0] for line in result]
@@ -327,15 +353,66 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+## 5 PDF file
+- Use by command line
+
+you can infer the first few pages by using the `page_num` parameter, the default is 0, which means infer all pages
+```bash
+paddleocr --image_dir ./xxx.pdf --use_angle_cls true --use_gpu false --page_num 2
+```
+- Use by code

+```python
+from paddleocr import PaddleOCR, draw_ocr
+
+# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
+# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
+# to switch the language model in order.
+ocr = PaddleOCR(use_angle_cls=True, lang="ch"， page_num=2)  # need to run only once to download and load model into memory
+img_path = './xxx.pdf'
+result = ocr.ocr(img_path, cls=True)
+for idx in range(len(result)):
+    res = result[idx]
+    for line in res:
+        print(line)

-## 5 Parameter Description
+# draw result
+import fitz
+from PIL import Image
+import cv2
+import numpy as np
+imgs = []
+with fitz.open(img_path) as pdf:
+    for pg in range(0, pdf.pageCount):
+        page = pdf[pg]
+        mat = fitz.Matrix(2, 2)
+        pm = page.getPixmap(matrix=mat, alpha=False)
+        # if width or height > 2000 pixels, don't enlarge the image
+        if pm.width > 2000 or pm.height > 2000:
+            pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+        img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+        imgs.append(img)
+for idx in range(len(result)):
+    res = result[idx]
+    image = imgs[idx]
+    boxes = [line[0] for line in res]
+    txts = [line[1][0] for line in res]
+    scores = [line[1][1] for line in res]
+    im_show = draw_ocr(image, boxes, txts, scores, font_path='doc/fonts/simfang.ttf')
+    im_show = Image.fromarray(im_show)
+    im_show.save('result_page_{}.jpg'.format(idx))
+```
+
+## 6 Parameter Description

 | Parameter                    | Description                                                                                                                                                                                                                 | Default value                  |
 |-------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------|
 | use_gpu                 | use GPU or not                                                                                                                                                                                                          | TRUE                    |
 | gpu_mem                 | GPU memory size used for initialization                                                                                                                                                                                              | 8000M                   |
 | image_dir               | The images path or folder path for predicting when used by the command line                                                                                                                                                                           |                         |
+| page_num               | Valid when the input type is pdf file, specify to predict the previous page_num pages, all pages are predicted by default                                                                                                                                                                           |          0               |
 | det_algorithm           | Type of detection algorithm selected                                                                                                                                                                                                   | DB                      |
 | det_model_dir           | the text detection inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/det`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None           |
 | det_max_side_len        | The maximum size of the long side of the image. When the long side exceeds this value, the long side will be resized to this size, and the short side will be scaled proportionally                                                                                                                         | 960                     |

--- a/paddleocr.py
+++ b/paddleocr.py
@@ -47,7 +47,7 @@ __all__ = [
 ]

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.6.0.1'
+VERSION = '2.6.0.2'
 SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

@@ -428,8 +428,8 @@ def check_img(img):
            download_with_progressbar(img, 'tmp.jpg')
            img = 'tmp.jpg'
        image_file = img
-        img, flag, _ = check_and_read(image_file)
-        if not flag:
+        img, flag_gif, flag_pdf = check_and_read(image_file)
+        if not flag_gif and not flag_pdf:
            with open(image_file, 'rb') as f:
                img = img_decode(f.read())
        if img is None:
@@ -499,6 +499,7 @@ class PaddleOCR(predict_system.TextSystem):
        logger.debug(params)
        # init det_model and rec_model
        super().__init__(params)
+        self.page_num = params.page_num

    def ocr(self, img, det=True, rec=True, cls=True):
        """
@@ -519,24 +520,43 @@ class PaddleOCR(predict_system.TextSystem):
            )

        img = check_img(img)
-
+        # for infer pdf file
+        if isinstance(img, list):
+            if self.page_num > len(img) or self.page_num == 0:
+                self.page_num = len(img)
+            imgs = img[:self.page_num]
+        else:
+            imgs = [img]
        if det and rec:
-            dt_boxes, rec_res, _ = self.__call__(img, cls)
-            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
+            ocr_res = []
+            for idx, img in enumerate(imgs):
+                dt_boxes, rec_res, _ = self.__call__(img, cls)
+                tmp_res = [[box.tolist(), res]
+                           for box, res in zip(dt_boxes, rec_res)]
+                ocr_res.append(tmp_res)
+            return ocr_res
        elif det and not rec:
-            dt_boxes, elapse = self.text_detector(img)
-            if dt_boxes is None:
-                return None
-            return [box.tolist() for box in dt_boxes]
+            ocr_res = []
+            for idx, img in enumerate(imgs):
+                dt_boxes, elapse = self.text_detector(img)
+                tmp_res = [box.tolist() for box in dt_boxes]
+                ocr_res.append(tmp_res)
+            return ocr_res
        else:
-            if not isinstance(img, list):
-                img = [img]
-            if self.use_angle_cls and cls:
-                img, cls_res, elapse = self.text_classifier(img)
-                if not rec:
-                    return cls_res
-            rec_res, elapse = self.text_recognizer(img)
-            return rec_res
+            ocr_res = []
+            cls_res = []
+            for idx, img in enumerate(imgs):
+                if not isinstance(img, list):
+                    img = [img]
+                if self.use_angle_cls and cls:
+                    img, cls_res_tmp, elapse = self.text_classifier(img)
+                    if not rec:
+                        cls_res.append(cls_res_tmp)
+                rec_res, elapse = self.text_recognizer(img)
+                ocr_res.append(rec_res)
+            if not rec:
+                return cls_res
+            return ocr_res


 class PPStructure(StructureSystem):
@@ -632,8 +652,10 @@ def main():
                                rec=args.rec,
                                cls=args.use_angle_cls)
            if result is not None:
-                for line in result:
-                    logger.info(line)
+                for idx in range(len(result)):
+                    res = result[idx]
+                    for line in res:
+                        logger.info(line)
        elif args.type == 'structure':
            img, flag_gif, flag_pdf = check_and_read(img_path)
            if not flag_gif and not flag_pdf:
@@ -681,7 +703,7 @@ def main():
                        "error in layout recovery image:{}, err msg: {}".format(
                            img_name, ex))
                    continue
-            
+
            for item in all_res:
                item.pop('img')
                item.pop('res')