提交 6750d1db 编写于 作者: X xiaosang 提交者: Kaipeng Deng

add ce for PointNet++ (#4218)

上级 5c8c880f
...@@ -21,6 +21,7 @@ import ast ...@@ -21,6 +21,7 @@ import ast
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import * from models import *
from data.modelnet40_reader import ModelNet40ClsReader from data.modelnet40_reader import ModelNet40ClsReader
...@@ -111,6 +112,11 @@ def parse_args(): ...@@ -111,6 +112,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='mini-batch interval for logging.') help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -128,6 +134,11 @@ def train(): ...@@ -128,6 +134,11 @@ def train():
"--model can only be 'MSG' or 'SSG'" "--model can only be 'MSG' or 'SSG'"
# build model # build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program() startup = fluid.Program()
train_prog = fluid.Program() train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup): with fluid.program_guard(train_prog, startup):
...@@ -209,6 +220,10 @@ def train(): ...@@ -209,6 +220,10 @@ def train():
train_stat = Stat() train_stat = Stat()
test_stat = Stat() test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch): for epoch_id in range(args.epoch):
try: try:
train_pyreader.start() train_pyreader.start()
...@@ -224,13 +239,17 @@ def train(): ...@@ -224,13 +239,17 @@ def train():
log_str = "" log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs): for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values)) log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period)) logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1 train_iter += 1
except fluid.core.EOFException: except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:]))) logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id))) save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation # evaluation
if not args.enable_ce:
try: try:
test_pyreader.start() test_pyreader.start()
test_iter = 0 test_iter = 0
...@@ -259,6 +278,25 @@ def train(): ...@@ -259,6 +278,25 @@ def train():
train_stat.reset() train_stat.reset()
train_periods = [] train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_cls_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_cls_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__": if __name__ == "__main__":
train() train()
...@@ -21,6 +21,7 @@ import ast ...@@ -21,6 +21,7 @@ import ast
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import * from models import *
from data.indoor3d_reader import Indoor3DReader from data.indoor3d_reader import Indoor3DReader
...@@ -110,6 +111,11 @@ def parse_args(): ...@@ -110,6 +111,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='mini-batch interval for logging.') help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -127,6 +133,11 @@ def train(): ...@@ -127,6 +133,11 @@ def train():
"--model can only be 'MSG' or 'SSG'" "--model can only be 'MSG' or 'SSG'"
# build model # build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program() startup = fluid.Program()
train_prog = fluid.Program() train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup): with fluid.program_guard(train_prog, startup):
...@@ -199,6 +210,10 @@ def train(): ...@@ -199,6 +210,10 @@ def train():
train_stat = Stat() train_stat = Stat()
test_stat = Stat() test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch): for epoch_id in range(args.epoch):
try: try:
train_pyreader.start() train_pyreader.start()
...@@ -214,13 +229,17 @@ def train(): ...@@ -214,13 +229,17 @@ def train():
log_str = "" log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs): for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values)) log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period)) logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1 train_iter += 1
except fluid.core.EOFException: except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:]))) logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id))) save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation # evaluation
if not args.enable_ce:
try: try:
test_pyreader.start() test_pyreader.start()
test_iter = 0 test_iter = 0
...@@ -249,6 +268,25 @@ def train(): ...@@ -249,6 +268,25 @@ def train():
train_stat.reset() train_stat.reset()
train_periods = [] train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_seg_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_seg_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__": if __name__ == "__main__":
train() train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册