diff --git a/demo/convert_for_predict.py b/demo/convert_for_predict.py new file mode 100644 index 0000000000000000000000000000000000000000..648375cab2d8b87640eada3773b2fcc46fa7dd2f --- /dev/null +++ b/demo/convert_for_predict.py @@ -0,0 +1,15 @@ +from __future__ import print_function +import numpy as np + +import paddle.fluid as fluid +from plsc import Entry + + +def main(): + ins = Entry() + ins.set_checkpoint_dir('./saved_model/5') + ins.set_model_save_dir('./output_infer') + ins.convert_for_prediction() + +if __name__ == "__main__": + main() diff --git a/demo/do_test.py b/demo/do_test.py new file mode 100644 index 0000000000000000000000000000000000000000..02e18d703e9ec354a91346d04ca3b945be52c8a3 --- /dev/null +++ b/demo/do_test.py @@ -0,0 +1,24 @@ +from __future__ import print_function +import numpy as np +import argparse + +import paddle.fluid as fluid +from plsc import Entry + +parser = argparse.ArgumentParser() +parser.add_argument("--checkpoint_dir", + type=str, + default=None, + help="Directory for checkpoints.") +args = parser.parse_args() + +def main(): + global args + ins = Entry() + ins.set_checkpoint_dir(args.checkpoint_dir) + ins.set_loss_type('arcface') + ins.set_dataset_dir('./data') + ins.test() + +if __name__ == "__main__": + main() diff --git a/demo/do_test.sh b/demo/do_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..a6e3bb08ce46a59feae551c1a842cf02435ba0fa --- /dev/null +++ b/demo/do_test.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +python do_test.py \ + --checkpoint_dir="./saved_model/8" diff --git a/demo/do_train.py b/demo/do_train.py new file mode 100644 index 0000000000000000000000000000000000000000..6a7a69b24449c52171ace0e7e6d29f41560f6619 --- /dev/null +++ b/demo/do_train.py @@ -0,0 +1,70 @@ +from __future__ import print_function +import numpy as np +import sys +import argparse + +import paddle.fluid as fluid +from plsc import Entry +from plsc.models.resnet import ResNet152 +import resnet_fp16 + + +parser = argparse.ArgumentParser() +parser.add_argument("--model_save_dir", + type=str, + default=None, + help="Directory to save models.") +parser.add_argument("--checkpoint_dir", + type=str, + default=None, + help="Directory for checkpoints.") +parser.add_argument("--data_dir", + type=str, + default="./data", + help="Directory for datasets.") +parser.add_argument("--num_epochs", + type=int, + default=2, + help="Number of epochs to run.") +parser.add_argument("--loss_type", + type=str, + default='arcface', + help="Loss type to use.") +parser.add_argument("--fs_name", + type=str, + default=None, + help="fs_name for hdfs.") +parser.add_argument("--fs_ugi", + type=str, + default=None, + help="fs_ugi for hdfs.") +parser.add_argument("--fs_dir_load", + type=str, + default=None, + help="Remote directory for hdfs to load from") +parser.add_argument("--fs_dir_save", + type=str, + default=None, + help="Remote directory for hdfs to save to") +args = parser.parse_args() + +def main(): + global args + ins = Entry() + ins.set_model_save_dir(args.model_save_dir) + ins.set_dataset_dir(args.data_dir) + ins.set_train_epochs(args.num_epochs) + ins.set_checkpoint_dir(args.checkpoint_dir) + ins.set_loss_type(args.loss_type) + ins.set_calc_acc(True) + # use mixed precision training + #ins.set_mixed_precision(True) + if args.fs_name: + ins.set_hdfs_info(args.fs_name, + args.fs_ugi, + args.fs_dir_save, + args.fs_dir_load) + ins.train() + +if __name__ == "__main__": + main() diff --git a/demo/process_base64_files.py b/demo/process_base64_files.py new file mode 100644 index 0000000000000000000000000000000000000000..783afca57ee4ba954e4398e62820702e7b0a3339 --- /dev/null +++ b/demo/process_base64_files.py @@ -0,0 +1,198 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import argparse +import logging +import math +import os +import random +import sqlite3 +import tempfile +import time + +import six + +logging.basicConfig(level=logging.INFO, + format='[%(asctime)s - %(levelname)s - %(message)s', + datefmt='%d %b %Y %H:%M:%S') +logger = logging.getLogger() + +parser = argparse.ArgumentParser(description=""" + Tool to preprocess dataset in base64 format.""") + +""" +We assume that the directory of dataset contains a file-list file, and one +or more data files. Each line of the file-list file represents a data file. +Each line of a data file represents a image in base64 format. + +For example: + +dir + |-- file_list.txt + |-- part_one.txt + `-- part_two.txt + +In the above example, the file file_list.txt has two lines: + + part_one.txt + part_two.txt + +Each line in part_one.txt and part_two.txt represents a image in base64 +format. +""" + +parser.add_argument("--data_dir", + type=str, + required=True, + default=None, + help="Directory for datasets.") +parser.add_argument("--file_list", + type=str, + required=True, + default=None, + help="The file contains a set of data files.") +parser.add_argument("--nranks", + type=int, + required=True, + default=1, + help="Number of ranks.") +args = parser.parse_args() + + +class Base64Preprocessor(object): + def __init__(self, data_dir, file_list, nranks): + super(Base64Preprocessor, self).__init__() + self.data_dir = data_dir + self.file_list = file_list + self.nranks = nranks + + self.tempfile = tempfile.NamedTemporaryFile(delete=False, dir=data_dir) + self.sqlite3_file = self.tempfile.name + self.conn = None + self.cursor = None + + def insert_to_db(self, cnt, line): + label = int(line[0]) + data = line[1] + sql_cmd = "INSERT INTO DATASET (ID, DATA, LABEL) " + sql_cmd += "VALUES ({}, '{}', {});".format(cnt, data, label) + self.cursor.execute(sql_cmd) + + def create_db(self): + start = time.time() + print(self.sqlite3_file) + self.conn = sqlite3.connect(self.sqlite3_file) + self.cursor = self.conn.cursor() + self.cursor.execute('''CREATE TABLE DATASET + (ID INT PRIMARY KEY NOT NULL, + DATA TEXT NOT NULL, + LABEL INT NOT NULL);''') + + file_list_path = os.path.join(self.data_dir, self.file_list) + with open(file_list_path, 'r') as f: + cnt = 0 + if six.PY2: + for line in f.xreadlines(): + line = line.strip() + file_path = os.path.join(self.data_dir, line) + with open(file_path, 'r') as df: + for line_local in df.xreadlines(): + line_local = line_local.strip().split('\t') + self.insert_to_db(cnt, line_local) + cnt += 1 + os.remove(file_path) + else: + for line in f: + line = line.strip() + file_path = os.path.join(self.data_dir, line) + with open(file_path, 'r') as df: + for line_local in df: + line_local = line_local.strip().split('\t') + self.insert_to_db(cnt, line_local) + cnt += 1 + os.remove(file_path) + + self.conn.commit() + diff = time.time() - start + print("time: ", diff) + return cnt + + def shuffle_files(self): + num = self.create_db() + nranks = self.nranks + index = [i for i in range(num)] + + seed = int(time.time()) + random.seed(seed) + random.shuffle(index) + + start_time = time.time() + + lines_per_rank = int(math.ceil(num / nranks)) + total_num = lines_per_rank * nranks + index = index + index[0:total_num - num] + assert len(index) == total_num + + for rank in range(nranks): + start = rank * lines_per_rank + end = (rank + 1) * lines_per_rank # exclusive + f_handler = open(os.path.join(self.data_dir, + ".tmp_" + str(rank)), 'w') + for i in range(start, end): + idx = index[i] + sql_cmd = "SELECT DATA, LABEL FROM DATASET WHERE ID={};".format( + idx) + cursor = self.cursor.execute(sql_cmd) + for result in cursor: + data = result[0] + label = result[1] + line = data + '\t' + str(label) + '\n' + f_handler.writelines(line) + f_handler.close() + + data_dir = self.data_dir + file_list = self.file_list + file_list = os.path.join(data_dir, file_list) + temp_file_list = file_list + "_temp" + with open(temp_file_list, 'w') as f_t: + for rank in range(nranks): + line = "base64_rank_{}".format(rank) + line += '\n' + f_t.writelines(line) + os.rename(os.path.join(data_dir, ".tmp_" + str(rank)), + os.path.join(data_dir, "base64_rank_{}".format(rank))) + + os.remove(file_list) + os.rename(temp_file_list, file_list) + print("shuffle time: ", time.time() - start_time) + + def close_db(self): + self.conn.close() + self.tempfile.close() + os.remove(self.sqlite3_file) + + +def main(): + global args + + obj = Base64Preprocessor(args.data_dir, args.file_list, args.nranks) + obj.shuffle_files() + obj.close_db() + + +if __name__ == "__main__": + main() diff --git a/demo/test_base64.sh b/demo/test_base64.sh new file mode 100644 index 0000000000000000000000000000000000000000..4d8817d194cf6dc94e95e83e38266ef929489058 --- /dev/null +++ b/demo/test_base64.sh @@ -0,0 +1 @@ +python diff --git a/demo/test_convert_for_predict.sh b/demo/test_convert_for_predict.sh new file mode 100644 index 0000000000000000000000000000000000000000..9cdf9aaac0d81a01d44888cf20600fb6f5cfb213 --- /dev/null +++ b/demo/test_convert_for_predict.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +python3 convert_for_predict.py diff --git a/demo/test_train.sh b/demo/test_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..cc0db5bb1beefc34708b4e3654dbe818f4d5ca4c --- /dev/null +++ b/demo/test_train.sh @@ -0,0 +1,112 @@ +#!/bin/env bash + +export PATH=/home/lilong/sandyhouse/PLSC/python/bin:$PATH +export FLAGS_eager_delete_tensor_gb=0.0 +export GLOG_v=0 + +## case 2: run with softmax +#python do_train.py \ +# --model_save_dir="./saved_model" \ +# --data_dir="./data" \ +# --num_epochs=2 \ +# --loss_type='softmax' +# +# case 3: run with distarcface +python -m paddle.distributed.launch \ + --log_dir='mylog' \ + --selected_gpus="0,1,2,3,4,5,6,7" \ + --started_port="12349" \ + do_train.py \ + --data_dir="./data" \ + --num_epochs=10000000 \ + --loss_type='dist_arcface' +# +## case 4: run with distsoftmax +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --model_save_dir="./saved_model" \ +# --data_dir="./data" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' + +## case 5: run from checkpoints with same number of trainers +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --model_save_dir="./saved_model" \ +# --checkpoint_dir="./saved_model/1" \ +# --data_dir="./data" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' + +## case 6: run from checkpoints with incresement number of trainers +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1,2,3" \ +# --started_port="12345" \ +# do_train.py \ +# --model_save_dir="./saved_model" \ +# --checkpoint_dir="./saved_model/0" \ +# --data_dir="./data" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' +# +## case 7: run from checkpoints with decreasement number of trainers +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --model_save_dir="./saved_model" \ +# --checkpoint_dir="./saved_model/0" \ +# --data_dir="./data" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' + +## case 8: save models to hdfs +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --model_save_dir="./saved_model" \ +# --data_dir="./data" \ +# --fs_name=${FS_NAME} \ +# --fs_ugi=${FS_UGI} \ +# --fs_dir_save="/user/paddle/lilong/models/saved_model2" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' + +## case 9: get models from hdfs +#python -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --checkpoint_dir="./saved_model/" \ +# --data_dir="./data" \ +# --fs_name=${FS_NAME} \ +# --fs_ugi=${FS_UGI} \ +# --fs_dir_load="/user/paddle/lilong/models/saved_model/0" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax' + +## case 10: get models from hdfs and save models to hdfs +#python3 -m paddle.distributed.launch \ +# --log_dir='mylog' \ +# --selected_gpus="0,1" \ +# --started_port="12345" \ +# do_train.py \ +# --checkpoint_dir="./saved_model/" \ +# --data_dir="./data" \ +# --fs_name=${FS_NAME} \ +# --fs_ugi=${FS_UGI} \ +# --fs_dir_load="/user/paddle/lilong/models/saved_model/0" \ +# --fs_dir_save="/user/paddle/lilong/models/saved_model2" \ +# --num_epochs=2 \ +# --loss_type='dist_softmax'