提交 6750d1db 编写于 作者: X xiaosang 提交者: Kaipeng Deng

add ce for PointNet++ (#4218)

上级 5c8c880f
......@@ -21,6 +21,7 @@ import ast
import logging
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import *
from data.modelnet40_reader import ModelNet40ClsReader
......@@ -111,6 +112,11 @@ def parse_args():
type=int,
default=1,
help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args()
return args
......@@ -128,6 +134,11 @@ def train():
"--model can only be 'MSG' or 'SSG'"
# build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program()
train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup):
......@@ -209,6 +220,10 @@ def train():
train_stat = Stat()
test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch):
try:
train_pyreader.start()
......@@ -224,13 +239,17 @@ def train():
log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1
except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation
if not args.enable_ce:
try:
test_pyreader.start()
test_iter = 0
......@@ -259,6 +278,25 @@ def train():
train_stat.reset()
train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_cls_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_cls_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__":
train()
......@@ -21,6 +21,7 @@ import ast
import logging
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.framework as framework
from models import *
from data.indoor3d_reader import Indoor3DReader
......@@ -110,6 +111,11 @@ def parse_args():
type=int,
default=1,
help='mini-batch interval for logging.')
parser.add_argument(
'--enable_ce',
action='store_true',
help='The flag indicating whether to run the task '
'for continuous evaluation.')
args = parser.parse_args()
return args
......@@ -127,6 +133,11 @@ def train():
"--model can only be 'MSG' or 'SSG'"
# build model
if args.enable_ce:
SEED = 102
fluid.default_main_program().random_seed = SEED
framework.default_startup_program().random_seed = SEED
startup = fluid.Program()
train_prog = fluid.Program()
with fluid.program_guard(train_prog, startup):
......@@ -199,6 +210,10 @@ def train():
train_stat = Stat()
test_stat = Stat()
ce_time = 0
ce_loss = []
for epoch_id in range(args.epoch):
try:
train_pyreader.start()
......@@ -214,13 +229,17 @@ def train():
log_str = ""
for name, values in zip(train_keys + ['learning_rate'], train_outs):
log_str += "{}: {:.5f}, ".format(name, np.mean(values))
if name == 'loss':
ce_loss.append(np.mean(values))
logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period))
train_iter += 1
except fluid.core.EOFException:
logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:])))
ce_time = np.mean(train_periods[1:])
save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id)))
# evaluation
if not args.enable_ce:
try:
test_pyreader.start()
test_iter = 0
......@@ -249,6 +268,25 @@ def train():
train_stat.reset()
train_periods = []
# only for ce
if args.enable_ce:
card_num = get_cards()
_loss = 0
_time = 0
try:
_time = ce_time
_loss = np.mean(ce_loss[1:])
except:
print("ce info error")
print("kpis\ttrain_seg_%s_duration_card%s\t%s" % (args.model, card_num, _time))
print("kpis\ttrain_seg_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def get_cards():
num = 0
cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cards != '':
num = len(cards.split(","))
return num
if __name__ == "__main__":
train()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册