提交 6750d1db 编写于 作者: X xiaosang 提交者: Kaipeng Deng

add ce for PointNet++ (#4218)

上级 5c8c880f
...@@ -21,6 +21,7 @@ import ast ...@@ -21,6 +21,7 @@ import ast
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import * from models import *
from data.modelnet40_reader import ModelNet40ClsReader from data.modelnet40_reader import ModelNet40ClsReader
...@@ -111,6 +112,11 @@ def parse_args(): ...@@ -111,6 +112,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='mini-batch interval for logging.') help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -128,6 +134,11 @@ def train(): ...@@ -128,6 +134,11 @@ def train():
"--model can only be 'MSG' or 'SSG'" "--model can only be 'MSG' or 'SSG'"
# build model # build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program() startup = fluid.Program()
train_prog = fluid.Program() train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup): with fluid.program_guard(train_prog, startup):
...@@ -209,6 +220,10 @@ def train(): ...@@ -209,6 +220,10 @@ def train():
train_stat = Stat() train_stat = Stat()
test_stat = Stat() test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch): for epoch_id in range(args.epoch):
try: try:
train_pyreader.start() train_pyreader.start()
...@@ -224,41 +239,64 @@ def train(): ...@@ -224,41 +239,64 @@ def train():
log_str = "" log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs): for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values)) log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period)) logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1 train_iter += 1
except fluid.core.EOFException: except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:]))) logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id))) save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation # evaluation
try: if not args.enable_ce:
test_pyreader.start() try:
test_iter = 0 test_pyreader.start()
test_periods = [] test_iter = 0
while True: test_periods = []
cur_time = time.time() while True:
test_outs = exe.run(test_compile_prog, fetch_list=test_values) cur_time = time.time()
period = time.time() - cur_time test_outs = exe.run(test_compile_prog, fetch_list=test_values)
test_periods.append(period) period = time.time() - cur_time
test_stat.update(test_keys, test_outs) test_periods.append(period)
if test_iter % args.log_interval == 0: test_stat.update(test_keys, test_outs)
log_str = "" if test_iter % args.log_interval == 0:
for name, value in zip(test_keys, test_outs): log_str = ""
log_str += "{}: {:.4f}, ".format(name, np.mean(value)) for name, value in zip(test_keys, test_outs):
logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period)) log_str += "{}: {:.4f}, ".format(name, np.mean(value))
test_iter += 1 logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period))
except fluid.core.EOFException: test_iter += 1
logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:]))) except fluid.core.EOFException:
finally: logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:])))
test_pyreader.reset() finally:
test_stat.reset() test_pyreader.reset()
test_periods = [] test_stat.reset()
test_periods = []
finally: finally:
train_pyreader.reset() train_pyreader.reset()
train_stat.reset() train_stat.reset()
train_periods = [] train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_cls_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_cls_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__": if __name__ == "__main__":
train() train()
...@@ -21,6 +21,7 @@ import ast ...@@ -21,6 +21,7 @@ import ast
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import * from models import *
from data.indoor3d_reader import Indoor3DReader from data.indoor3d_reader import Indoor3DReader
...@@ -110,6 +111,11 @@ def parse_args(): ...@@ -110,6 +111,11 @@ def parse_args():
type=int, type=int,
default=1, default=1,
help='mini-batch interval for logging.') help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args() args = parser.parse_args()
return args return args
...@@ -127,6 +133,11 @@ def train(): ...@@ -127,6 +133,11 @@ def train():
"--model can only be 'MSG' or 'SSG'" "--model can only be 'MSG' or 'SSG'"
# build model # build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program() startup = fluid.Program()
train_prog = fluid.Program() train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup): with fluid.program_guard(train_prog, startup):
...@@ -199,6 +210,10 @@ def train(): ...@@ -199,6 +210,10 @@ def train():
train_stat = Stat() train_stat = Stat()
test_stat = Stat() test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch): for epoch_id in range(args.epoch):
try: try:
train_pyreader.start() train_pyreader.start()
...@@ -214,41 +229,64 @@ def train(): ...@@ -214,41 +229,64 @@ def train():
log_str = "" log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs): for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values)) log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period)) logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1 train_iter += 1
except fluid.core.EOFException: except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:]))) logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id))) save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation # evaluation
try: if not args.enable_ce:
test_pyreader.start() try:
test_iter = 0 test_pyreader.start()
test_periods = [] test_iter = 0
while True: test_periods = []
cur_time = time.time() while True:
test_outs = exe.run(test_compile_prog, fetch_list=test_values) cur_time = time.time()
period = time.time() - cur_time test_outs = exe.run(test_compile_prog, fetch_list=test_values)
test_periods.append(period) period = time.time() - cur_time
test_stat.update(test_keys, test_outs) test_periods.append(period)
if test_iter % args.log_interval == 0: test_stat.update(test_keys, test_outs)
log_str = "" if test_iter % args.log_interval == 0:
for name, value in zip(test_keys, test_outs): log_str = ""
log_str += "{}: {:.4f}, ".format(name, np.mean(value)) for name, value in zip(test_keys, test_outs):
logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period)) log_str += "{}: {:.4f}, ".format(name, np.mean(value))
test_iter += 1 logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period))
except fluid.core.EOFException: test_iter += 1
logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:]))) except fluid.core.EOFException:
finally: logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:])))
test_pyreader.reset() finally:
test_stat.reset() test_pyreader.reset()
test_periods = [] test_stat.reset()
test_periods = []
finally: finally:
train_pyreader.reset() train_pyreader.reset()
train_stat.reset() train_stat.reset()
train_periods = [] train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_seg_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_seg_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__": if __name__ == "__main__":
train() train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册