“b9b75377a2db78651023707ef9b6a342c661eaf4”上不存在“python/paddle/fluid/dygraph/amp/__init__.py”
提交 85bf8683 编写于 作者: C cuicheng01

Support for parsing label files

上级 eade1b72
...@@ -30,5 +30,7 @@ PostProcess: ...@@ -30,5 +30,7 @@ PostProcess:
main_indicator: MultiLabelThreshOutput main_indicator: MultiLabelThreshOutput
MultiLabelThreshOutput: MultiLabelThreshOutput:
threshold: 0.5 threshold: 0.5
class_id_map_file: "../ppcls/utils/NUS-WIDE-SCENE_label_list.txt"
delimiter: " "
SavePreLabel: SavePreLabel:
save_dir: ./pre_label/ save_dir: ./pre_label/
...@@ -82,10 +82,11 @@ class ThreshOutput(object): ...@@ -82,10 +82,11 @@ class ThreshOutput(object):
class Topk(object): class Topk(object):
def __init__(self, topk=1, class_id_map_file=None): def __init__(self, topk=1, class_id_map_file=None, delimiter=None):
assert isinstance(topk, (int, )) assert isinstance(topk, (int, ))
self.class_id_map = self.parse_class_id_map(class_id_map_file)
self.topk = topk self.topk = topk
self.class_id_map = self.parse_class_id_map(class_id_map_file)
self.delimiter = delimiter if delimiter is not None else " "
def parse_class_id_map(self, class_id_map_file): def parse_class_id_map(self, class_id_map_file):
if class_id_map_file is None: if class_id_map_file is None:
...@@ -102,21 +103,20 @@ class Topk(object): ...@@ -102,21 +103,20 @@ class Topk(object):
with open(class_id_map_file, "r") as fin: with open(class_id_map_file, "r") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
partition = line.split("\n")[0].partition(" ") partition = line.split("\n")[0].partition(self.delimiter)
class_id_map[int(partition[0])] = str(partition[-1]) class_id_map[int(partition[0])] = str(partition[-1])
except Exception as ex: except Exception as ex:
print(ex) print(ex)
class_id_map = None class_id_map = None
return class_id_map return class_id_map
def __call__(self, x, file_names=None, multilabel=False): def __call__(self, x, file_names=None):
if file_names is not None: if file_names is not None:
assert x.shape[0] == len(file_names) assert x.shape[0] == len(file_names)
y = [] y = []
for idx, probs in enumerate(x): for idx, probs in enumerate(x):
index = probs.argsort(axis=0)[-self.topk:][::-1].astype( index = probs.argsort(axis=0)[-self.topk:][::-1].astype(
"int32") if not multilabel else np.where( "int32")
probs >= 0.5)[0].astype("int32")
clas_id_list = [] clas_id_list = []
score_list = [] score_list = []
label_name_list = [] label_name_list = []
...@@ -139,8 +139,32 @@ class Topk(object): ...@@ -139,8 +139,32 @@ class Topk(object):
class MultiLabelThreshOutput(object): class MultiLabelThreshOutput(object):
def __init__(self, threshold=0.5): def __init__(self, threshold=0.5, class_id_map_file=None, delimiter=None):
self.threshold = threshold self.threshold = threshold
self.delimiter = delimiter if delimiter is not None else " "
self.class_id_map = self.parse_class_id_map(class_id_map_file)
def parse_class_id_map(self, class_id_map_file):
if class_id_map_file is None:
return None
if not os.path.exists(class_id_map_file):
print(
"Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!"
)
return None
try:
class_id_map = {}
with open(class_id_map_file, "r") as fin:
lines = fin.readlines()
for line in lines:
partition = line.split("\n")[0].partition(self.delimiter)
class_id_map[int(partition[0])] = str(partition[-1])
except Exception as ex:
print(ex)
class_id_map = None
return class_id_map
def __call__(self, x, file_names=None): def __call__(self, x, file_names=None):
y = [] y = []
...@@ -148,14 +172,17 @@ class MultiLabelThreshOutput(object): ...@@ -148,14 +172,17 @@ class MultiLabelThreshOutput(object):
index = np.where(probs >= self.threshold)[0].astype("int32") index = np.where(probs >= self.threshold)[0].astype("int32")
clas_id_list = [] clas_id_list = []
score_list = [] score_list = []
label_name_list = []
for i in index: for i in index:
clas_id_list.append(i.item()) clas_id_list.append(i.item())
score_list.append(probs[i].item()) score_list.append(probs[i].item())
if self.class_id_map is not None:
label_name_list.append(self.class_id_map[i.item()])
result = { result = {
"class_ids": clas_id_list, "class_ids": clas_id_list,
"scores": np.around( "scores": np.around(
score_list, decimals=5).tolist(), score_list, decimals=5).tolist(),
"label_names": [] "label_names": label_name_list
} }
if file_names is not None: if file_names is not None:
result["file_name"] = file_names[idx] result["file_name"] = file_names[idx]
......
...@@ -51,6 +51,9 @@ python3 -m paddle.distributed.launch \ ...@@ -51,6 +51,9 @@ python3 -m paddle.distributed.launch \
After training 10 epochs, the best correctness of the validation set should be around 0.95. After training 10 epochs, the best correctness of the validation set should be around 0.95.
**Note:**
1. Currently, the loss function for multi-label classification only supports `MultiLabelLoss` (BCE Loss).
2. Currently, the evaluation metrics of multi-label classification support `Accuracy_Score` and `HammingDistance`. Please look forward to the support of other evaluation metrics.
<a name="3"></a> <a name="3"></a>
## 3. Model Evaluation ## 3. Model Evaluation
...@@ -74,7 +77,7 @@ python3 tools/infer.py \ ...@@ -74,7 +77,7 @@ python3 tools/infer.py \
Obtain an output silimar to the following: Obtain an output silimar to the following:
``` ```
[{'class_ids': [6, 13, 17, 23, 26, 30], 'scores': [0.95683, 0.5567, 0.55211, 0.99088, 0.5943, 0.78767], 'file_name': './deploy/images/0517_2715693311.jpg', 'label_names': []}] [{'class_ids': [6, 13, 17, 23, 30], 'scores': [0.98217, 0.78129, 0.64377, 0.9942, 0.96109], 'label_names': ['clouds', 'lake', 'ocean', 'sky', 'water'], 'file_name': 'deploy/images/0517_2715693311.jpg'}]
``` ```
...@@ -109,9 +112,11 @@ Inference and prediction through predictive engines: ...@@ -109,9 +112,11 @@ Inference and prediction through predictive engines:
python3 python/predict_cls.py \ python3 python/predict_cls.py \
-c configs/inference_cls_multilabel.yaml -c configs/inference_cls_multilabel.yaml
``` ```
The predicted pictures are as follows:
![](../../images/quick_start/multi_label_demo.png)
Obtain an output silimar to the following: Obtain an output silimar to the following:
``` ```
0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 26, 30], score(s): [0.96, 0.56, 0.55, 0.99, 0.59, 0.79], label_name(s): [] 0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 30], score(s): [0.98, 0.78, 0.64, 0.99, 0.96], label_name(s): ['clouds', 'lake', 'ocean', 'sky', 'water']
``` ```
...@@ -50,6 +50,10 @@ python3 -m paddle.distributed.launch \ ...@@ -50,6 +50,10 @@ python3 -m paddle.distributed.launch \
训练 10 epoch 之后,验证集最好的正确率应该在 0.95 左右。 训练 10 epoch 之后,验证集最好的正确率应该在 0.95 左右。
**注意:**
1. 目前多标签分类的损失函数仅支持`MultiLabelLoss`(BCE Loss)。
2. 目前多标签分类的评估指标支持`AccuracyScore``HammingDistance`,其他评估指标敬请期待。
<a name="3"></a> <a name="3"></a>
## 3. 模型评估 ## 3. 模型评估
...@@ -71,7 +75,7 @@ python3 tools/infer.py \ ...@@ -71,7 +75,7 @@ python3 tools/infer.py \
得到类似下面的输出: 得到类似下面的输出:
``` ```
[{'class_ids': [6, 13, 17, 23, 26, 30], 'scores': [0.95683, 0.5567, 0.55211, 0.99088, 0.5943, 0.78767], 'file_name': './deploy/images/0517_2715693311.jpg', 'label_names': []}] [{'class_ids': [6, 13, 17, 23, 30], 'scores': [0.98217, 0.78129, 0.64377, 0.9942, 0.96109], 'label_names': ['clouds', 'lake', 'ocean', 'sky', 'water'], 'file_name': 'deploy/images/0517_2715693311.jpg'}]
``` ```
<a name="5"></a> <a name="5"></a>
...@@ -102,8 +106,10 @@ cd ./deploy ...@@ -102,8 +106,10 @@ cd ./deploy
python3 python/predict_cls.py \ python3 python/predict_cls.py \
-c configs/inference_cls_multilabel.yaml -c configs/inference_cls_multilabel.yaml
``` ```
推理图片如下:
![](../../images/quick_start/multi_label_demo.png)
得到类似下面的输出: 得到类似下面的输出:
``` ```
0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 26, 30], score(s): [0.96, 0.56, 0.55, 0.99, 0.59, 0.79], label_name(s): [] 0517_2715693311.jpg: class id(s): [6, 13, 17, 23, 30], score(s): [0.98, 0.78, 0.64, 0.99, 0.96], label_name(s): ['clouds', 'lake', 'ocean', 'sky', 'water']
``` ```
...@@ -118,6 +118,8 @@ Infer: ...@@ -118,6 +118,8 @@ Infer:
PostProcess: PostProcess:
name: MultiLabelThreshOutput name: MultiLabelThreshOutput
threshold: 0.5 threshold: 0.5
class_id_map_file: "ppcls/utils/NUS-WIDE-SCENE_label_list.txt"
delimiter: " "
Metric: Metric:
Train: Train:
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import numpy as np import numpy as np
import paddle.nn.functional as F import paddle.nn.functional as F
...@@ -38,8 +39,31 @@ class ThreshOutput(object): ...@@ -38,8 +39,31 @@ class ThreshOutput(object):
class MultiLabelThreshOutput(object): class MultiLabelThreshOutput(object):
def __init__(self, threshold=0.5): def __init__(self, threshold=0.5, class_id_map_file=None, delimiter=None):
self.threshold = threshold self.threshold = threshold
self.delimiter = delimiter if delimiter is not None else " "
self.class_id_map = self.parse_class_id_map(class_id_map_file)
def parse_class_id_map(self, class_id_map_file):
if class_id_map_file is None:
return None
if not os.path.exists(class_id_map_file):
print(
"Warning: If want to use your own label_dict, please input legal path!\nOtherwise label_names will be empty!"
)
return None
try:
class_id_map = {}
with open(class_id_map_file, "r") as fin:
lines = fin.readlines()
for line in lines:
partition = line.split("\n")[0].partition(self.delimiter)
class_id_map[int(partition[0])] = str(partition[-1])
except Exception as ex:
print(ex)
class_id_map = None
return class_id_map
def __call__(self, x, file_names=None): def __call__(self, x, file_names=None):
y = [] y = []
...@@ -48,13 +72,17 @@ class MultiLabelThreshOutput(object): ...@@ -48,13 +72,17 @@ class MultiLabelThreshOutput(object):
index = np.where(probs >= self.threshold)[0].astype("int32") index = np.where(probs >= self.threshold)[0].astype("int32")
clas_id_list = [] clas_id_list = []
score_list = [] score_list = []
label_name_list = []
for i in index: for i in index:
clas_id_list.append(i.item()) clas_id_list.append(i.item())
score_list.append(probs[i].item()) score_list.append(probs[i].item())
if self.class_id_map is not None:
label_name_list.append(self.class_id_map[i.item()])
result = { result = {
"class_ids": clas_id_list, "class_ids": clas_id_list,
"scores": np.around( "scores": np.around(
score_list, decimals=5).tolist(), score_list, decimals=5).tolist(),
"label_names": label_name_list
} }
if file_names is not None: if file_names is not None:
result["file_name"] = file_names[idx] result["file_name"] = file_names[idx]
......
...@@ -46,19 +46,18 @@ class Topk(object): ...@@ -46,19 +46,18 @@ class Topk(object):
class_id_map = None class_id_map = None
return class_id_map return class_id_map
def __call__(self, x, file_names=None, multilabel=False): def __call__(self, x, file_names=None):
if isinstance(x, dict): if isinstance(x, dict):
x = x['logits'] x = x['logits']
assert isinstance(x, paddle.Tensor) assert isinstance(x, paddle.Tensor)
if file_names is not None: if file_names is not None:
assert x.shape[0] == len(file_names) assert x.shape[0] == len(file_names)
x = F.softmax(x, axis=-1) if not multilabel else F.sigmoid(x) x = F.softmax(x, axis=-1)
x = x.numpy() x = x.numpy()
y = [] y = []
for idx, probs in enumerate(x): for idx, probs in enumerate(x):
index = probs.argsort(axis=0)[-self.topk:][::-1].astype( index = probs.argsort(axis=0)[-self.topk:][::-1].astype(
"int32") if not multilabel else np.where( "int32")
probs >= 0.5)[0].astype("int32")
clas_id_list = [] clas_id_list = []
score_list = [] score_list = []
label_name_list = [] label_name_list = []
......
0 airport
1 beach
2 bridge
3 buildings
4 castle
5 cityscape
6 clouds
7 frost
8 garden
9 glacier
10 grass
11 harbor
12 house
13 lake
14 moon
15 mountain
16 nighttime
17 ocean
18 plants
19 railroad
20 rainbow
21 reflection
22 road
23 sky
24 snow
25 street
26 sunset
27 temple
28 town
29 valley
30 water
31 waterfall
32 window
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册