未验证 提交 9059f747 编写于 作者: A andyj 提交者: GitHub

support new version of numpy (#8921)

* add finetune en doc & test=document_fix

* fix dead link & test=document_fix

* fix dead link & test=document_fix

* update check img

* fix det res dtype

* update args default type & test=document_fix

* fix numpy version

* support numpy1.24.0
上级 955c46e0
......@@ -223,7 +223,7 @@ AIStudio项目链接:[快速构建卡证类OCR](https://aistudio.baidu.com/ais
2)获取并解压预训练模型,如果要使用其他模型可以从模型库里自主选择合适模型。
```
!wget -P work/pre_trained/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
!wget -P work/pre_trained/ https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
!tar -vxf /home/aistudio/work/pre_trained/ch_PP-OCRv3_det_distill_train.tar -C /home/aistudio/work/pre_trained
```
3) 安装必要依赖
......@@ -275,7 +275,7 @@ AIStudio项目链接:[快速构建卡证类OCR](https://aistudio.baidu.com/ais
```python
class DetLabelEncode(object):
# 修改检测标签的编码处,新增了参数分类数:num_classes,重写初始化方法,以及分类标签的读取
def __init__(self, label_list, num_classes=8, **kwargs):
......@@ -315,11 +315,11 @@ class DetLabelEncode(object):
classes.append(int(self.label_list.index(txt)))
if len(boxes) == 0:
return None
boxes = self.expand_points_num(boxes)
boxes = np.array(boxes, dtype=np.float32)
txt_tags = np.array(txt_tags, dtype=np.bool)
txt_tags = np.array(txt_tags, dtype=np.bool_)
classes = classes
data['polys'] = boxes
data['texts'] = txts
......@@ -410,10 +410,10 @@ class MakeShrinkMap(object):
data['shrink_map'] = gt
if self.num_classes > 1:
data['class_mask'] = gt_class
data['shrink_mask'] = mask
return data
```
......@@ -634,10 +634,10 @@ class DBPostProcess(object):
'''
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
......@@ -752,11 +752,11 @@ class DBPostProcess(object):
其他命令:
```
!python /home/aistudio/work/PaddleOCR/tools/eval.py -c /home/aistudio/work/PaddleOCR/configs/det/det_mv3_db.yml
!python /home/aistudio/work/PaddleOCR/tools/infer_det.py -c /home/aistudio/work/PaddleOCR/configs/det/det_mv3_db.yml
!python /home/aistudio/work/PaddleOCR/tools/infer_det.py -c /home/aistudio/work/PaddleOCR/configs/det/det_mv3_db.yml
```
模型推理
```
!python /home/aistudio/work/PaddleOCR/tools/infer/predict_det.py --image_dir="/home/aistudio/work/test_img/" --det_model_dir="/home/aistudio/work/PaddleOCR/output/infer"
!python /home/aistudio/work/PaddleOCR/tools/infer/predict_det.py --image_dir="/home/aistudio/work/test_img/" --det_model_dir="/home/aistudio/work/PaddleOCR/output/infer"
```
## 5 总结
......
......@@ -24,6 +24,7 @@ import math
from PIL import Image
import numpy as np
def resize_norm_img(img, image_shape, padding=True):
imgC, imgH, imgW = image_shape
h = img.shape[0]
......@@ -61,9 +62,8 @@ def create_header_file(name, tensor_name, tensor_data, output_path):
raw_path = file_path.with_suffix(".h").resolve()
with open(raw_path, "w") as header_file:
header_file.write(
"\n"
+ f"const size_t {tensor_name}_len = {tensor_data.size};\n"
+ f'__attribute__((section(".data.tvm"), aligned(16))) float {tensor_name}[] = '
"\n" + f"const size_t {tensor_name}_len = {tensor_data.size};\n" +
f'__attribute__((section(".data.tvm"), aligned(16))) float {tensor_name}[] = '
)
header_file.write("{")
......@@ -80,22 +80,21 @@ def create_headers(image_name):
# Resize image to 32x320
img = cv2.imread(img_path)
img = resize_norm_img(img, [3,32,320])
img = resize_norm_img(img, [3, 32, 320])
img_data = img.astype("float32")
# # Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(img_data, axis=0)
# Create input header file
create_header_file("inputs", "input", img_data, "./include")
# Create output header file
output_data = np.zeros([7760], np.float)
output_data = np.zeros([7760], np.float32)
create_header_file(
"outputs",
"output",
output_data,
"./include",
)
"./include", )
if __name__ == "__main__":
......
......@@ -122,7 +122,7 @@ class OCRDet(hub.Module):
rec_res_final = []
for dno in range(len(dt_boxes)):
rec_res_final.append({
'text_region': dt_boxes[dno].astype(np.int).tolist()
'text_region': dt_boxes[dno].astype(np.int32).tolist()
})
all_results.append(rec_res_final)
return all_results
......
......@@ -130,7 +130,7 @@ class OCRSystem(hub.Module):
rec_res_final.append({
'text': text,
'confidence': float(score),
'text_region': dt_boxes[dno].astype(np.int).tolist()
'text_region': dt_boxes[dno].astype(np.int32).tolist()
})
all_results.append(rec_res_final)
return all_results
......
......@@ -205,7 +205,7 @@ class CVRandomAffine(object):
for x, y in startpoints]
rect = cv2.minAreaRect(np.array(endpoints))
bbox = cv2.boxPoints(rect).astype(dtype=np.int)
bbox = cv2.boxPoints(rect).astype(dtype=np.int32)
max_x, max_y = bbox[:, 0].max(), bbox[:, 1].max()
min_x, min_y = bbox[:, 0].min(), bbox[:, 1].min()
......@@ -234,9 +234,9 @@ class CVRandomPerspective(object):
def get_params(self, width, height, distortion):
offset_h = sample_asym(
distortion * height / 2, size=4).astype(dtype=np.int)
distortion * height / 2, size=4).astype(dtype=np.int32)
offset_w = sample_asym(
distortion * width / 2, size=4).astype(dtype=np.int)
distortion * width / 2, size=4).astype(dtype=np.int32)
topleft = (offset_w[0], offset_h[0])
topright = (width - 1 - offset_w[1], offset_h[1])
botright = (width - 1 - offset_w[2], height - 1 - offset_h[2])
......@@ -256,7 +256,7 @@ class CVRandomPerspective(object):
# TODO: more robust way to crop image
rect = cv2.minAreaRect(endpoints)
bbox = cv2.boxPoints(rect).astype(dtype=np.int)
bbox = cv2.boxPoints(rect).astype(dtype=np.int32)
max_x, max_y = bbox[:, 0].max(), bbox[:, 1].max()
min_x, min_y = bbox[:, 0].min(), bbox[:, 1].min()
min_x, min_y = max(min_x, 0), max(min_y, 0)
......@@ -441,7 +441,8 @@ class SVTRGeometry(object):
self.p = p
self.transforms = []
self.transforms.append(CVRandomRotation(degrees=degrees))
self.transforms.append(CVRandomAffine(
self.transforms.append(
CVRandomAffine(
degrees=degrees, translate=translate, scale=scale, shear=shear))
self.transforms.append(CVRandomPerspective(distortion=distortion))
......@@ -455,4 +456,4 @@ class SVTRGeometry(object):
img = self.transforms[random.randint(0, 2)](img)
return img
else:
return img
\ No newline at end of file
return img
......@@ -208,7 +208,7 @@ class RandomCropFlip:
for polygon in all_polys:
rect = cv2.minAreaRect(polygon.astype(np.int32).reshape(-1, 2))
box = cv2.boxPoints(rect)
box = np.int0(box)
box = np.int64(box)
text_polys.append([box[0], box[1], box[2], box[3]])
polys = np.array(text_polys, dtype=np.int32)
......
......@@ -22,10 +22,12 @@ from numpy.fft import fft
from numpy.linalg import norm
import sys
def vector_slope(vec):
assert len(vec) == 2
return abs(vec[1] / (vec[0] + 1e-8))
class FCENetTargets:
"""Generate the ground truth targets of FCENet: Fourier Contour Embedding
for Arbitrary-Shaped Text Detection.
......@@ -107,7 +109,9 @@ class FCENetTargets:
for i in range(1, n):
current_line_len = i * delta_length
while current_edge_ind + 1 < len(length_cumsum) and current_line_len >= length_cumsum[current_edge_ind + 1]:
while current_edge_ind + 1 < len(
length_cumsum) and current_line_len >= length_cumsum[
current_edge_ind + 1]:
current_edge_ind += 1
current_edge_end_shift = current_line_len - length_cumsum[
......@@ -239,10 +243,9 @@ class FCENetTargets:
head_inds = [head_start, head_end]
tail_inds = [tail_start, tail_end]
else:
if vector_slope(points[1] - points[0]) + vector_slope(
points[3] - points[2]) < vector_slope(points[
2] - points[1]) + vector_slope(points[0] - points[
3]):
if vector_slope(points[1] - points[0]) + vector_slope(points[
3] - points[2]) < vector_slope(points[2] - points[
1]) + vector_slope(points[0] - points[3]):
horizontal_edge_inds = [[0, 1], [2, 3]]
vertical_edge_inds = [[3, 0], [1, 2]]
else:
......@@ -582,7 +585,7 @@ class FCENetTargets:
lv_ignore_polys = [[] for i in range(len(lv_size_divs))]
level_maps = []
for poly in text_polys:
polygon = np.array(poly, dtype=np.int).reshape((1, -1, 2))
polygon = np.array(poly, dtype=np.int32).reshape((1, -1, 2))
_, _, box_w, box_h = cv2.boundingRect(polygon)
proportion = max(box_h, box_w) / (h + 1e-8)
......@@ -591,7 +594,7 @@ class FCENetTargets:
lv_text_polys[ind].append(poly / lv_size_divs[ind])
for ignore_poly in ignore_polys:
polygon = np.array(ignore_poly, dtype=np.int).reshape((1, -1, 2))
polygon = np.array(ignore_poly, dtype=np.int32).reshape((1, -1, 2))
_, _, box_w, box_h = cv2.boundingRect(polygon)
proportion = max(box_h, box_w) / (h + 1e-8)
......
......@@ -64,7 +64,7 @@ class DetLabelEncode(object):
return None
boxes = self.expand_points_num(boxes)
boxes = np.array(boxes, dtype=np.float32)
txt_tags = np.array(txt_tags, dtype=np.bool)
txt_tags = np.array(txt_tags, dtype=np.bool_)
data['polys'] = boxes
data['texts'] = txts
......@@ -218,7 +218,7 @@ class E2ELabelEncodeTest(BaseRecLabelEncode):
else:
txt_tags.append(False)
boxes = np.array(boxes, dtype=np.float32)
txt_tags = np.array(txt_tags, dtype=np.bool)
txt_tags = np.array(txt_tags, dtype=np.bool_)
data['polys'] = boxes
data['ignore_tags'] = txt_tags
temp_texts = []
......@@ -254,7 +254,7 @@ class E2ELabelEncodeTrain(object):
else:
txt_tags.append(False)
boxes = np.array(boxes, dtype=np.float32)
txt_tags = np.array(txt_tags, dtype=np.bool)
txt_tags = np.array(txt_tags, dtype=np.bool_)
data['polys'] = boxes
data['texts'] = txts
......
......@@ -40,7 +40,7 @@ def fill_hole(input_mask):
mask = np.zeros((h + 4, w + 4), np.uint8)
cv2.floodFill(canvas, mask, (0, 0), 1)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool_)
return ~canvas | input_mask
......
......@@ -68,7 +68,7 @@ def graph_propagation(edges, scores, text_comps, edge_len_thr=50.):
score_dict[edge[0], edge[1]] = scores[i]
nodes = np.sort(np.unique(edges.flatten()))
mapping = -1 * np.ones((np.max(nodes) + 1), dtype=np.int)
mapping = -1 * np.ones((np.max(nodes) + 1), dtype=np.int32)
mapping[nodes] = np.arange(nodes.shape[0])
order_inds = mapping[edges]
vertices = [Node(node) for node in nodes]
......@@ -93,9 +93,8 @@ def connected_components(nodes, score_dict, link_thr):
while node_queue:
node = node_queue.pop(0)
neighbors = set([
neighbor for neighbor in node.links
if score_dict[tuple(sorted([node.ind, neighbor.ind]))] >=
link_thr
neighbor for neighbor in node.links if
score_dict[tuple(sorted([node.ind, neighbor.ind]))] >= link_thr
])
neighbors.difference_update(cluster)
nodes.difference_update(neighbors)
......
......@@ -31,7 +31,7 @@ def fill_hole(input_mask):
mask = np.zeros((h + 4, w + 4), np.uint8)
cv2.floodFill(canvas, mask, (0, 0), 1)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool_)
return ~canvas | input_mask
......@@ -234,7 +234,7 @@ class FCEPostProcess(object):
poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
score = boundary[-1]
points = cv2.boxPoints(cv2.minAreaRect(poly))
points = np.int0(points)
points = np.int64(points)
new_boundaries.append(points.reshape(-1).tolist() + [score])
boundaries = new_boundaries
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册