未验证 提交 f84a7383 编写于 作者: C chenjian 提交者: GitHub

support zero dim tensor (#2116)

Co-authored-by: Nwuzewu <wuzewu@baidu.com>
上级 161f5814
...@@ -9,17 +9,19 @@ ...@@ -9,17 +9,19 @@
Authors: lvhaijun01@baidu.com Authors: lvhaijun01@baidu.com
Date: 2020-11-24 20:46 Date: 2020-11-24 20:46
""" """
from paddlehub.finetune.trainer import Trainer
import os import os
from collections import defaultdict from collections import defaultdict
import paddle import paddle
from paddle.distributed import ParallelEnv from paddle.distributed import ParallelEnv
from paddlehub.finetune.trainer import Trainer
from paddlehub.utils.log import logger from paddlehub.utils.log import logger
from paddlehub.utils.utils import Timer from paddlehub.utils.utils import Timer
class CustomTrainer(Trainer): class CustomTrainer(Trainer):
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
super(CustomTrainer, self).__init__(**kwargs) super(CustomTrainer, self).__init__(**kwargs)
...@@ -39,10 +41,15 @@ class CustomTrainer(Trainer): ...@@ -39,10 +41,15 @@ class CustomTrainer(Trainer):
place = paddle.CUDAPlace(ParallelEnv().dev_id) if use_gpu else paddle.CPUPlace() place = paddle.CUDAPlace(ParallelEnv().dev_id) if use_gpu else paddle.CPUPlace()
paddle.disable_static(place) paddle.disable_static(place)
batch_sampler = paddle.io.DistributedBatchSampler( batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) batch_size=batch_size,
loader = paddle.io.DataLoader( shuffle=True,
train_dataset, batch_sampler=batch_sampler, places=place, num_workers=num_workers, return_list=True) drop_last=False)
loader = paddle.io.DataLoader(train_dataset,
batch_sampler=batch_sampler,
places=place,
num_workers=num_workers,
return_list=True)
return batch_sampler, loader return batch_sampler, loader
def train_one_epoch(self, loader: paddle.io.DataLoader, timer: Timer, current_epoch: int, epochs: int, def train_one_epoch(self, loader: paddle.io.DataLoader, timer: Timer, current_epoch: int, epochs: int,
...@@ -57,9 +64,9 @@ class CustomTrainer(Trainer): ...@@ -57,9 +64,9 @@ class CustomTrainer(Trainer):
self.optimizer_zero_grad(current_epoch, batch_idx, self.optimizer) self.optimizer_zero_grad(current_epoch, batch_idx, self.optimizer)
# calculate metrics and loss # calculate metrics and loss
avg_loss += loss.numpy()[0] avg_loss += float(loss)
for metric, value in metrics.items(): for metric, value in metrics.items():
avg_metrics[metric] += value.numpy()[0] avg_metrics[metric] += float(value)
timer.count() timer.count()
...@@ -127,8 +134,9 @@ class CustomTrainer(Trainer): ...@@ -127,8 +134,9 @@ class CustomTrainer(Trainer):
self.log_writer.add_scalar(tag='EVAL/loss', step=timer.current_step, value=eval_loss) self.log_writer.add_scalar(tag='EVAL/loss', step=timer.current_step, value=eval_loss)
for metric, value in eval_metrics.items(): for metric, value in eval_metrics.items():
self.log_writer.add_scalar( self.log_writer.add_scalar(tag='EVAL/{}'.format(metric),
tag='EVAL/{}'.format(metric), step=timer.current_step, value=value) step=timer.current_step,
value=value)
if not self.best_metrics or self.compare_metrics(self.best_metrics, eval_metrics): if not self.best_metrics or self.compare_metrics(self.best_metrics, eval_metrics):
self.best_metrics = eval_metrics self.best_metrics = eval_metrics
...@@ -147,11 +155,16 @@ class CustomTrainer(Trainer): ...@@ -147,11 +155,16 @@ class CustomTrainer(Trainer):
place = paddle.CUDAPlace(ParallelEnv().dev_id) if use_gpu else paddle.CPUPlace() place = paddle.CUDAPlace(ParallelEnv().dev_id) if use_gpu else paddle.CPUPlace()
paddle.disable_static(place) paddle.disable_static(place)
batch_sampler = paddle.io.DistributedBatchSampler( batch_sampler = paddle.io.DistributedBatchSampler(eval_dataset,
eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False) batch_size=batch_size,
shuffle=False,
drop_last=False)
loader = paddle.io.DataLoader( loader = paddle.io.DataLoader(eval_dataset,
eval_dataset, batch_sampler=batch_sampler, places=place, num_workers=num_workers, return_list=True) batch_sampler=batch_sampler,
places=place,
num_workers=num_workers,
return_list=True)
return loader return loader
def evaluate_process(self, loader: paddle.io.DataLoader) -> dict: def evaluate_process(self, loader: paddle.io.DataLoader) -> dict:
...@@ -168,10 +181,10 @@ class CustomTrainer(Trainer): ...@@ -168,10 +181,10 @@ class CustomTrainer(Trainer):
num_samples += bs num_samples += bs
if loss: if loss:
avg_loss += loss.numpy()[0] * bs avg_loss += float(loss) * bs
for metric, value in metrics.items(): for metric, value in metrics.items():
sum_metrics[metric] += value.numpy()[0] * bs sum_metrics[metric] += float(value) * bs
# print avg metrics and loss # print avg metrics and loss
print_msg = '[Evaluation result]' print_msg = '[Evaluation result]'
......
...@@ -11,15 +11,17 @@ ...@@ -11,15 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import pickle import pickle
import time import time
from collections import defaultdict from collections import defaultdict
from typing import Any, Callable, Generic, List from typing import Any
from typing import Callable
from typing import Generic
from typing import List
import paddle
import numpy as np import numpy as np
import paddle
from visualdl import LogWriter from visualdl import LogWriter
from paddlehub.utils.log import logger from paddlehub.utils.log import logger
...@@ -188,15 +190,16 @@ class Trainer(object): ...@@ -188,15 +190,16 @@ class Trainer(object):
if not hasattr(model, 'validation_step'): if not hasattr(model, 'validation_step'):
raise NotImplementedError('The specified finetuning model does not support evaluation.') raise NotImplementedError('The specified finetuning model does not support evaluation.')
batch_sampler = paddle.io.DistributedBatchSampler( batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) batch_size=batch_size,
loader = paddle.io.DataLoader( shuffle=True,
train_dataset, drop_last=False)
batch_sampler=batch_sampler, loader = paddle.io.DataLoader(train_dataset,
num_workers=num_workers, batch_sampler=batch_sampler,
return_list=True, num_workers=num_workers,
use_buffer_reader=True, return_list=True,
collate_fn=collate_fn) use_buffer_reader=True,
collate_fn=collate_fn)
steps_per_epoch = len(batch_sampler) steps_per_epoch = len(batch_sampler)
timer = Timer(steps_per_epoch * epochs) timer = Timer(steps_per_epoch * epochs)
...@@ -214,7 +217,7 @@ class Trainer(object): ...@@ -214,7 +217,7 @@ class Trainer(object):
self.optimizer_zero_grad(self.current_epoch, batch_idx, self.optimizer) self.optimizer_zero_grad(self.current_epoch, batch_idx, self.optimizer)
# calculate metrics and loss # calculate metrics and loss
avg_loss += loss.numpy()[0] avg_loss += float(loss)
for metric, value in metrics.items(): for metric, value in metrics.items():
if isinstance(value, paddle.Tensor): if isinstance(value, paddle.Tensor):
value = value.numpy() value = value.numpy()
...@@ -235,8 +238,9 @@ class Trainer(object): ...@@ -235,8 +238,9 @@ class Trainer(object):
for metric, value in avg_metrics.items(): for metric, value in avg_metrics.items():
value /= log_interval value /= log_interval
if self.use_vdl: if self.use_vdl:
self.log_writer.add_scalar( self.log_writer.add_scalar(tag='TRAIN/{}'.format(metric),
tag='TRAIN/{}'.format(metric), step=timer.current_step, value=value) step=timer.current_step,
value=value)
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
value = value.item() value = value.item()
print_msg += ' {}={:.4f}'.format(metric, value) print_msg += ' {}={:.4f}'.format(metric, value)
...@@ -258,8 +262,9 @@ class Trainer(object): ...@@ -258,8 +262,9 @@ class Trainer(object):
self.log_writer.add_scalar(tag='EVAL/loss', step=timer.current_step, value=eval_loss) self.log_writer.add_scalar(tag='EVAL/loss', step=timer.current_step, value=eval_loss)
for metric, value in eval_metrics.items(): for metric, value in eval_metrics.items():
self.log_writer.add_scalar( self.log_writer.add_scalar(tag='EVAL/{}'.format(metric),
tag='EVAL/{}'.format(metric), step=timer.current_step, value=value) step=timer.current_step,
value=value)
if not self.best_metrics or self.compare_metrics(self.best_metrics, eval_metrics): if not self.best_metrics or self.compare_metrics(self.best_metrics, eval_metrics):
self.best_metrics = eval_metrics self.best_metrics = eval_metrics
...@@ -293,15 +298,14 @@ class Trainer(object): ...@@ -293,15 +298,14 @@ class Trainer(object):
if self.local_rank == 0: if self.local_rank == 0:
batch_sampler = paddle.io.BatchSampler(eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False) batch_sampler = paddle.io.BatchSampler(eval_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
loader = paddle.io.DataLoader( loader = paddle.io.DataLoader(eval_dataset,
eval_dataset, batch_sampler=batch_sampler,
batch_sampler=batch_sampler, num_workers=num_workers,
num_workers=num_workers, return_list=True,
return_list=True, collate_fn=collate_fn)
collate_fn=collate_fn)
self.model.eval() self.model.eval()
avg_loss = num_samples = 0 avg_loss = num_samples = 0
sum_metrics = defaultdict(int) sum_metrics = defaultdict(int)
avg_metrics = defaultdict(int) avg_metrics = defaultdict(int)
...@@ -317,7 +321,7 @@ class Trainer(object): ...@@ -317,7 +321,7 @@ class Trainer(object):
num_samples += bs num_samples += bs
if loss: if loss:
avg_loss += loss.numpy()[0] * bs avg_loss += float(loss) * bs
for metric, value in metrics.items(): for metric, value in metrics.items():
sum_metrics[metric] += value * bs sum_metrics[metric] += value * bs
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册