diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml index f3ad966488dbc2f6f7ca12033bc4a3d35e1b3bd7..8b160f63538d51dc57b08ba83f7ebf019e3c9dbb 100644 --- a/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml +++ b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml @@ -65,7 +65,7 @@ Loss: - ["Student", "Teacher"] maps_name: "thrink_maps" weight: 1.0 - act: "softmax" + # act: None model_name_pairs: ["Student", "Teacher"] key: maps - DistillationDBLoss: diff --git a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml index e1831f26396cffde28a2ca881a0b03312a68f801..e85127a2ee37fc4a6c59066e465318792f9a0dd3 100644 --- a/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml +++ b/configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_dml.yml @@ -60,7 +60,7 @@ Loss: - ["Student", "Student2"] maps_name: "thrink_maps" weight: 1.0 - act: "softmax" + # act: None model_name_pairs: ["Student", "Student2"] key: maps - DistillationDBLoss: diff --git a/deploy/lite/crnn_process.cc b/deploy/lite/crnn_process.cc index 6d5fc1504e7b1b3faa35a80662442f60d2e30499..7cd95cddb528e8571d8bab67517ae0140492c536 100644 --- a/deploy/lite/crnn_process.cc +++ b/deploy/lite/crnn_process.cc @@ -34,12 +34,13 @@ cv::Mat CrnnResizeImg(cv::Mat img, float wh_ratio, int rec_image_height) { resize_w = imgW; else resize_w = int(ceilf(imgH * ratio)); - + cv::Mat resize_img; cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::INTER_LINEAR); cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, int(imgW - resize_img.cols), cv::BORDER_CONSTANT, {127, 127, 127}); + return resize_img; } std::vector ReadDict(std::string path) { diff --git a/deploy/lite/ocr_db_crnn.cc b/deploy/lite/ocr_db_crnn.cc index cb2bf7791a4307d4e8d2167197d41d903410e0b4..fde0d07d6c1af769610865da1697e6eef0442899 100644 --- a/deploy/lite/ocr_db_crnn.cc +++ b/deploy/lite/ocr_db_crnn.cc @@ -474,7 +474,7 @@ void system(char **argv){ std::vector rec_times; RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score, - charactor_dict, cls_predictor, use_direction_classify, &rec_times); + charactor_dict, cls_predictor, use_direction_classify, &rec_times, rec_image_height); //// visualization auto img_vis = Visualization(srcimg, boxes); diff --git a/ppocr/losses/basic_loss.py b/ppocr/losses/basic_loss.py index 2df96ea2642d10a50eb892d738f89318dc5e0f4c..74490791c2af0be54dab8ab30ac323790fcac657 100644 --- a/ppocr/losses/basic_loss.py +++ b/ppocr/losses/basic_loss.py @@ -57,17 +57,24 @@ class CELoss(nn.Layer): class KLJSLoss(object): def __init__(self, mode='kl'): assert mode in ['kl', 'js', 'KL', 'JS' - ], "mode can only be one of ['kl', 'js', 'KL', 'JS']" + ], "mode can only be one of ['kl', 'KL', 'js', 'JS']" self.mode = mode def __call__(self, p1, p2, reduction="mean"): - loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) - - if self.mode.lower() == "js": + if self.mode.lower() == 'kl': + loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5)) + loss += paddle.multiply( + p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) + loss *= 0.5 + elif self.mode.lower() == "js": + loss = paddle.multiply(p2, paddle.log((2*p2 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5)) loss += paddle.multiply( - p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5)) + p1, paddle.log((2*p1 + 1e-5) / (p1 + p2 + 1e-5) + 1e-5)) loss *= 0.5 + else: + raise ValueError("The mode.lower() if KLJSLoss should be one of ['kl', 'js']") + if reduction == "mean": loss = paddle.mean(loss, axis=[1, 2]) elif reduction == "none" or reduction is None: @@ -95,7 +102,7 @@ class DMLLoss(nn.Layer): self.act = None self.use_log = use_log - self.jskl_loss = KLJSLoss(mode="js") + self.jskl_loss = KLJSLoss(mode="kl") def _kldiv(self, x, target): eps = 1.0e-10