retrieval.py 6.9 KB
Newer Older
D
dongshuilong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
D
dongshuilong 已提交
17

D
dongshuilong 已提交
18 19 20 21 22
import platform
import paddle
from ppcls.utils import logger


W
weishengyu 已提交
23 24
def retrieval_eval(engine, epoch_id=0):
    engine.model.eval()
D
dongshuilong 已提交
25
    # step1. build gallery
W
weishengyu 已提交
26
    if engine.gallery_query_dataloader is not None:
27
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
W
weishengyu 已提交
28
            engine, name='gallery_query')
29 30 31
        query_feas, query_img_id, query_query_id = gallery_feas, gallery_img_id, gallery_unique_id
    else:
        gallery_feas, gallery_img_id, gallery_unique_id = cal_feature(
W
weishengyu 已提交
32
            engine, name='gallery')
33
        query_feas, query_img_id, query_query_id = cal_feature(
W
weishengyu 已提交
34
            engine, name='query')
D
dongshuilong 已提交
35 36

    # step2. do evaluation
W
weishengyu 已提交
37
    sim_block_size = engine.config["Global"].get("sim_block_size", 64)
D
dongshuilong 已提交
38 39 40 41 42 43 44 45 46 47
    sections = [sim_block_size] * (len(query_feas) // sim_block_size)
    if len(query_feas) % sim_block_size:
        sections.append(len(query_feas) % sim_block_size)
    fea_blocks = paddle.split(query_feas, num_or_sections=sections)
    if query_query_id is not None:
        query_id_blocks = paddle.split(
            query_query_id, num_or_sections=sections)
    image_id_blocks = paddle.split(query_img_id, num_or_sections=sections)
    metric_key = None

W
weishengyu 已提交
48
    if engine.eval_loss_func is None:
D
dongshuilong 已提交
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
        metric_dict = {metric_key: 0.}
    else:
        metric_dict = dict()
        for block_idx, block_fea in enumerate(fea_blocks):
            similarity_matrix = paddle.matmul(
                block_fea, gallery_feas, transpose_y=True)
            if query_query_id is not None:
                query_id_block = query_id_blocks[block_idx]
                query_id_mask = (query_id_block != gallery_unique_id.t())

                image_id_block = image_id_blocks[block_idx]
                image_id_mask = (image_id_block != gallery_img_id.t())

                keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
                similarity_matrix = similarity_matrix * keep_mask.astype(
                    "float32")
            else:
                keep_mask = None

W
weishengyu 已提交
68
            metric_tmp = engine.eval_metric_func(similarity_matrix,
D
dongshuilong 已提交
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
                                                 image_id_blocks[block_idx],
                                                 gallery_img_id, keep_mask)

            for key in metric_tmp:
                if key not in metric_dict:
                    metric_dict[key] = metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)
                else:
                    metric_dict[key] += metric_tmp[key] * block_fea.shape[
                        0] / len(query_feas)

    metric_info_list = []
    for key in metric_dict:
        if metric_key is None:
            metric_key = key
        metric_info_list.append("{}: {:.5f}".format(key, metric_dict[key]))
    metric_msg = ", ".join(metric_info_list)
    logger.info("[Eval][Epoch {}][Avg]{}".format(epoch_id, metric_msg))

    return metric_dict[metric_key]


W
weishengyu 已提交
91
def cal_feature(engine, name='gallery'):
D
dongshuilong 已提交
92 93 94
    has_unique_id = False

    if name == 'gallery':
W
weishengyu 已提交
95
        dataloader = engine.gallery_dataloader
D
dongshuilong 已提交
96
    elif name == 'query':
W
weishengyu 已提交
97
        dataloader = engine.query_dataloader
98
    elif name == 'gallery_query':
W
weishengyu 已提交
99
        dataloader = engine.gallery_query_dataloader
D
dongshuilong 已提交
100 101 102
    else:
        raise RuntimeError("Only support gallery or query dataset")

103 104 105
    batch_feas_list = []
    img_id_list = []
    unique_id_list = []
D
dongshuilong 已提交
106 107
    max_iter = len(dataloader) - 1 if platform.system() == "Windows" else len(
        dataloader)
W
weishengyu 已提交
108
    for idx, batch in enumerate(dataloader):  # load is very time-consuming
D
dongshuilong 已提交
109 110
        if idx >= max_iter:
            break
W
weishengyu 已提交
111
        if idx % engine.config["Global"]["print_batch_step"] == 0:
D
dongshuilong 已提交
112 113 114
            logger.info(
                f"{name} feature calculation process: [{idx}/{len(dataloader)}]"
            )
W
weishengyu 已提交
115
        if engine.use_dali:
D
dongshuilong 已提交
116 117 118 119 120 121 122 123 124
            batch = [
                paddle.to_tensor(batch[0]['data']),
                paddle.to_tensor(batch[0]['label'])
            ]
        batch = [paddle.to_tensor(x) for x in batch]
        batch[1] = batch[1].reshape([-1, 1]).astype("int64")
        if len(batch) == 3:
            has_unique_id = True
            batch[2] = batch[2].reshape([-1, 1]).astype("int64")
W
weishengyu 已提交
125
        out = engine.model(batch[0], batch[1])
126 127
        if "Student" in out:
            out = out["Student"]
D
dongshuilong 已提交
128 129 130
        batch_feas = out["features"]

        # do norm
W
weishengyu 已提交
131
        if engine.config["Global"].get("feature_normalize", True):
D
dongshuilong 已提交
132 133 134
            feas_norm = paddle.sqrt(
                paddle.sum(paddle.square(batch_feas), axis=1, keepdim=True))
            batch_feas = paddle.divide(batch_feas, feas_norm)
135

B
Bin Lu 已提交
136
        # do binarize
W
weishengyu 已提交
137
        if engine.config["Global"].get("feature_binarize") == "round":
B
Bin Lu 已提交
138 139
            batch_feas = paddle.round(batch_feas).astype("float32") * 2.0 - 1.0

W
weishengyu 已提交
140
        if engine.config["Global"].get("feature_binarize") == "sign":
B
Bin Lu 已提交
141
            batch_feas = paddle.sign(batch_feas).astype("float32")
D
dongshuilong 已提交
142

143 144 145 146 147 148 149 150
        if paddle.distributed.get_world_size() > 1:
            batch_feas_gather = []
            img_id_gather = []
            unique_id_gather = []
            paddle.distributed.all_gather(batch_feas_gather, batch_feas)
            paddle.distributed.all_gather(img_id_gather, batch[1])
            batch_feas_list.append(paddle.concat(batch_feas_gather))
            img_id_list.append(paddle.concat(img_id_gather))
D
dongshuilong 已提交
151
            if has_unique_id:
152 153
                paddle.distributed.all_gather(unique_id_gather, batch[2])
                unique_id_list.append(paddle.concat(unique_id_gather))
D
dongshuilong 已提交
154
        else:
155 156
            batch_feas_list.append(batch_feas)
            img_id_list.append(batch[1])
D
dongshuilong 已提交
157
            if has_unique_id:
158
                unique_id_list.append(batch[2])
159

W
weishengyu 已提交
160 161
    if engine.use_dali:
        dataloader.reset()
162

163 164 165 166 167 168 169 170 171 172 173 174
    all_feas = paddle.concat(batch_feas_list)
    all_img_id = paddle.concat(img_id_list)
    if has_unique_id:
        all_unique_id = paddle.concat(unique_id_list)

    # just for DistributedBatchSampler issue: repeat sampling
    total_samples = len(
        dataloader.dataset) if not engine.use_dali else dataloader.size
    all_feas = all_feas[:total_samples]
    all_img_id = all_img_id[:total_samples]
    if has_unique_id:
        all_unique_id = all_unique_id[:total_samples]
D
dongshuilong 已提交
175 176 177

    logger.info("Build {} done, all feat shape: {}, begin to eval..".format(
        name, all_feas.shape))
178
    return all_feas, all_img_id, all_unique_id