diff --git a/README.md b/README.md index ee12d8c5c4926231f8d477376ec9b5d4d791e452..2ed73889bece66823a019459f2ab87c10d8fa647 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,10 @@ * 通过运行read_datasests.py,可以对数据的标注信息进行解析可视化。 ### 2、静态手势识别数据集 -* 数据集来源大部分样本是网上数据自制。 -* 少部分来源于 kinect_leap_dataset 数据集样本加工制作,其官网地址为:https://lttm.dei.unipd.it/downloads/gesture/ +* 数据集来源3部分,且网上数据占绝大多数,具体: +* 1)来源于网上数据并自制。 +* 2)来源于自己相机采集并自制。 +* 3)来源于 kinect_leap_dataset 数据集并自制,其官网地址为:https://lttm.dei.unipd.it/downloads/gesture/ * 注:数据集如有侵权,请联系删除。 ``` kinect_leap_dataset 数据集来源于以下paper项目组的贡献。 @@ -28,20 +30,23 @@ kinect_leap_dataset 数据集来源于以下paper项目组的贡献。 * 数据下载 该项目数据集命名为:handpose_x_gesture_v1,[数据集下载地址(百度网盘 Password: )]() - + 数据集共 2850 个样本,分为 14 类。 * 手势定义 -| 定义 | 示例 | -|--|--| -| 000-one | ![one](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/one.jpg) | -| 第二章 | ~ | -| 第三章 | ~ | -| 第四章 | ~ | -| 第五章 | ~ | -| 第六章 | ~ | -| 第七章 | ~ | -| 第八章 | ~ | -| 第九章 | ~ | -| 第十章 | ~ | + + ![one](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/one.jpg) :000-one + ![five](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/five.jpg) :001-five + ![fist](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/fist.jpg) :002-fist + ![ok](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/ok.jpg) :003-ok + ![heartSingle](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/heartSingle.jpg) :004-heartSingle + ![yearh](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/yearh.jpg) :005-yearh + ![three](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/three.jpg) :006-three + ![four](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/four.jpg) :007-four + ![six](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/six.jpg) :008-six + ![Iloveyou](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/Iloveyou.jpg) :009-Iloveyou + ![gun](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/gun.jpg) :010-gun + ![thumbUp](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/thumbUp.jpg) :011-thumbUp + ![nine](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/nine.jpg) :012-nine + ![pink](https://codechina.csdn.net/EricLee/classification/-/raw/master/samples/pink.jpg) :013-pink ## 预训练模型 * [预训练模型下载地址(百度网盘 Password: ks87 )](https://pan.baidu.com/s/1tT0wF4N2I9p5JDfCwtM1CQ) diff --git a/inference.py b/inference.py index 8bb2f82f1e39961a2f4b453b0d15d44b719837f8..681c1bf20713de778e22277a5a61445ded6dcaff 100644 --- a/inference.py +++ b/inference.py @@ -40,21 +40,23 @@ def get_xml_msg(path): if __name__ == "__main__": - parser = argparse.ArgumentParser(description=' Project Classification Test') - parser.add_argument('--test_model', type=str, default = './model_exp/2021-02-09_06-32-32/resnet50_epoch-627.pth', + parser = argparse.ArgumentParser(description=' Project Classification top1 Test') + parser.add_argument('--test_model', type=str, default = './model_exp/2021-03-08_01-16-46/resnet_34-192_epoch-176.pth', help = 'test_model') # 模型路径 - parser.add_argument('--model', type=str, default = 'resnet_50', + parser.add_argument('--model', type=str, default = 'resnet_34', help = 'model : resnet_18,resnet_34,resnet_50,resnet_101,resnet_152') # 模型类型 - parser.add_argument('--num_classes', type=int , default = 120, + parser.add_argument('--num_classes', type=int , default = 14, help = 'num_classes') # 分类类别个数 parser.add_argument('--GPUS', type=str, default = '0', help = 'GPUS') # GPU选择 - parser.add_argument('--test_path', type=str, default = './datasets/test/', + parser.add_argument('--test_path', type=str, default = './handpose_x_gesture_v1/', help = 'test_path') # 测试集路径 - parser.add_argument('--img_size', type=tuple , default = (256,256), + parser.add_argument('--img_size', type=tuple , default = (192,192), help = 'img_size') # 输入模型图片尺寸 parser.add_argument('--fix_res', type=bool , default = False, help = 'fix_resolution') # 输入模型样本图片是否保证图像分辨率的长宽比 + parser.add_argument('--have_label_file', type=bool , default = False, + help = 'have_label_file') # 是否可视化图片 parser.add_argument('--vis', type=bool , default = True, help = 'vis') # 是否可视化图片 @@ -125,20 +127,20 @@ if __name__ == "__main__": img = cv2.imread(ops.test_path +doc+'/'+ file) #--------------- + if ops.have_label_file: + xml_ = ops.test_path +doc+'/'+ file.replace(".jpg",".xml") - xml_ = ops.test_path +doc+'/'+ file.replace(".jpg",".xml") + list_x = get_xml_msg(xml_)# 获取 xml 文件 的 object - list_x = get_xml_msg(xml_)# 获取 xml 文件 的 object - - # 绘制 bbox - for j in range(min(1,len(list_x))): - label_,bbox_ = list_x[j] - x1,y1,x2,y2 = bbox_ - x1 = int(np.clip(x1,0,img.shape[1]-1)) - y1 = int(np.clip(y1,0,img.shape[0]-1)) - x2 = int(np.clip(x2,0,img.shape[1]-1)) - y2 = int(np.clip(y2,0,img.shape[0]-1)) - img = img[y1:y2,x1:x2,:] + # 绘制 bbox + for j in range(min(1,len(list_x))): + label_,bbox_ = list_x[j] + x1,y1,x2,y2 = bbox_ + x1 = int(np.clip(x1,0,img.shape[1]-1)) + y1 = int(np.clip(y1,0,img.shape[0]-1)) + x2 = int(np.clip(x2,0,img.shape[1]-1)) + y2 = int(np.clip(y2,0,img.shape[0]-1)) + img = img[y1:y2,x1:x2,:] # 输入图片预处理 if ops.fix_res: @@ -171,7 +173,7 @@ if __name__ == "__main__": score_ = output[max_index] - print('gt {} -- pre {} --->>> confidence {}'.format(gt_label,max_index,score_)) + print('gt {} - {} -- pre {} --->>> confidence {}'.format(doc,gt_label,max_index,score_)) dict_p[dict_static[max_index]] += 1 if gt_label == max_index: dict_r[doc] += 1 diff --git a/samples/Iloveyou.jpg b/samples/Iloveyou.jpg new file mode 100644 index 0000000000000000000000000000000000000000..72088a31ccf3e85e5124d49f77cd92922cf2679c Binary files /dev/null and b/samples/Iloveyou.jpg differ diff --git a/samples/fist.jpg b/samples/fist.jpg new file mode 100644 index 0000000000000000000000000000000000000000..47e0e57c5d6326fdff14acb486bd01da5300e1db Binary files /dev/null and b/samples/fist.jpg differ diff --git a/samples/five.jpg b/samples/five.jpg new file mode 100644 index 0000000000000000000000000000000000000000..01c07504a71ec7ac556ed56a64a8e76ff4e321bf Binary files /dev/null and b/samples/five.jpg differ diff --git a/samples/four.jpg b/samples/four.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d99de899c849ec747a61db7b58ea98bf55757d69 Binary files /dev/null and b/samples/four.jpg differ diff --git a/samples/gun.jpg b/samples/gun.jpg new file mode 100644 index 0000000000000000000000000000000000000000..bafca8664d7aec71c508fb0b94ba8065d83eea14 Binary files /dev/null and b/samples/gun.jpg differ diff --git a/samples/heartSingle.jpg b/samples/heartSingle.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1866949798a150682452e6d231842a3c11eb961a Binary files /dev/null and b/samples/heartSingle.jpg differ diff --git a/samples/nine.jpg b/samples/nine.jpg new file mode 100644 index 0000000000000000000000000000000000000000..23cdf22c1b1fe3c5778b0840204c4b19ac4ee4f2 Binary files /dev/null and b/samples/nine.jpg differ diff --git a/samples/ok.jpg b/samples/ok.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2e48fa836798115e0f4c768af5ec4502063f72df Binary files /dev/null and b/samples/ok.jpg differ diff --git a/samples/pink.jpg b/samples/pink.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9f3dedc8e82ba0e76f4c8b33758c0a0a67c41cef Binary files /dev/null and b/samples/pink.jpg differ diff --git a/samples/six.jpg b/samples/six.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7f5a4b47c8b2c9e31be5423d805e99ad257ffc35 Binary files /dev/null and b/samples/six.jpg differ diff --git a/samples/three.jpg b/samples/three.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68baf858175cbd02d818369349134066286c72be Binary files /dev/null and b/samples/three.jpg differ diff --git a/samples/thumbUp.jpg b/samples/thumbUp.jpg new file mode 100644 index 0000000000000000000000000000000000000000..340c2f95c68f8bcec66ee58c9766693347149c6a Binary files /dev/null and b/samples/thumbUp.jpg differ diff --git a/samples/yearh.jpg b/samples/yearh.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d315490452d22280c31cd2f202abeede70f5e1df Binary files /dev/null and b/samples/yearh.jpg differ diff --git a/train.py b/train.py index 93f4af711554cb68ba12b35ce237725b68cbb3dd..4bb38748c1c5f0881ab6f5e243703095250e71b5 100644 --- a/train.py +++ b/train.py @@ -139,7 +139,7 @@ def trainer(ops,f_log): # print(model_)# 打印模型结构 # Dataset - dataset = LoadImagesAndLabels(path = ops.train_path,img_size=ops.img_size,flag_agu=ops.flag_agu,fix_res = ops.fix_res,val_split = val_split) + dataset = LoadImagesAndLabels(path = ops.train_path,img_size=ops.img_size,flag_agu=ops.flag_agu,fix_res = ops.fix_res,val_split = val_split,have_label_file = ops.have_label_file) print('len train datasets : %s'%(dataset.__len__())) # Dataloader dataloader = DataLoader(dataset, @@ -228,8 +228,9 @@ def trainer(ops,f_log): # 一个 epoch 保存连词最新的 模型 if i%(int(dataset.__len__()/ops.batch_size/2-1)) == 0 and i > 0: torch.save(model_.state_dict(), ops.model_exp + 'latest.pth') - # 每一个 epoch 进行模型保存 - torch.save(model_.state_dict(), ops.model_exp + 'model_epoch-{}.pth'.format(epoch)) + # 每间隔 5 个 epoch 进行模型保存 + if (epoch%5) == 0 and (epoch > 9): + torch.save(model_.state_dict(), ops.model_exp + '{}-{}_epoch-{}.pth'.format(ops.model,ops.img_size[0],epoch)) if len(val_split) > 0 and (epoch%ops.test_interval==0): # test @@ -259,14 +260,23 @@ if __name__ == "__main__": help = 'seed') # 设置随机种子 parser.add_argument('--model_exp', type=str, default = './model_exp', help = 'model_exp') # 模型输出文件夹 - parser.add_argument('--model', type=str, default = 'resnet_50', + parser.add_argument('--model', type=str, default = 'resnet_34', help = 'model : resnet_18,resnet_34,resnet_50,resnet_101,resnet_152') # 模型类型 - parser.add_argument('--num_classes', type=int , default = 120, - help = 'num_classes') # 分类类别个数 + + ''' + 注意以下3个参数与具体分类任务数据集,息息相关 + ''' + #--------------------------------------------------------------------------------- + parser.add_argument('--train_path', type=str, default = './handpose_x_gesture_v1/', + help = 'train_path') # 训练集路径 + parser.add_argument('--num_classes', type=int , default = 14, + help = 'num_classes') # 分类类别个数,gesture 配置为 14 , Stanford Dogs 配置为 120 + parser.add_argument('--have_label_file', type=bool, default = False, + help = 'have_label_file') # 是否有配套的标注文件解析才能生成分类样本,gesture 配置为 False , Stanford Dogs 配置为 True + #--------------------------------------------------------------------------------- + parser.add_argument('--GPUS', type=str, default = '0', help = 'GPUS') # GPU选择 - parser.add_argument('--train_path', type=str, default = './datasets/train/', - help = 'train_path') # 训练集路径 parser.add_argument('--val_factor', type=float, default = 0.0, help = 'val_factor') # 从训练集中分离验证集对应的比例 parser.add_argument('--test_interval', type=int, default = 1, @@ -291,7 +301,7 @@ if __name__ == "__main__": help = 'epochs') # 训练周期 parser.add_argument('--num_workers', type=int, default = 1, help = 'num_workers') # 训练数据生成器线程数 - parser.add_argument('--img_size', type=tuple , default = (256,256), + parser.add_argument('--img_size', type=tuple , default = (192,192), help = 'img_size') # 输入模型图片尺寸 parser.add_argument('--flag_agu', type=bool , default = True, help = 'data_augmentation') # 训练数据生成器是否进行数据扩增 @@ -302,6 +312,7 @@ if __name__ == "__main__": parser.add_argument('--log_flag', type=bool, default = False, help = 'log flag') # 是否保存训练 log + #-------------------------------------------------------------------------- args = parser.parse_args()# 解析添加参数 #--------------------------------------------------------------------------