From c54887dd7243506e5fe7824114188335e9d937f1 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Mon, 21 Sep 2020 10:06:59 +0800 Subject: [PATCH] fix the windows bug in ernie_gen (#905) --- .../text/text_generation/ernie_gen/README.md | 4 ++++ .../text/text_generation/ernie_gen/module.py | 7 ++++--- .../ernie_gen/propeller/paddle/data/example.proto | 4 ++-- .../ernie_gen/propeller/paddle/data/feature.proto | 2 +- .../propeller/paddle/data/feature_column.py | 2 +- .../ernie_gen/propeller/paddle/train/hooks.py | 8 ++++---- .../ernie_gen/propeller/paddle/train/trainer.py | 14 +++++++------- .../ernie_gen/propeller/service/server.py | 4 ++-- .../ernie_gen/propeller/tools/ckpt_inspector.py | 1 - .../modules/text/text_generation/gpt2/__init__.py | 0 10 files changed, 25 insertions(+), 21 deletions(-) create mode 100644 hub_module/modules/text/text_generation/gpt2/__init__.py diff --git a/hub_module/modules/text/text_generation/ernie_gen/README.md b/hub_module/modules/text/text_generation/ernie_gen/README.md index c75f481c..fc3a08d3 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/README.md +++ b/hub_module/modules/text/text_generation/ernie_gen/README.md @@ -184,3 +184,7 @@ paddlehub >= 1.7.0 * 1.0.1 修复模型导出bug + +* 1.0.2 + + 修复windows运行中的bug diff --git a/hub_module/modules/text/text_generation/ernie_gen/module.py b/hub_module/modules/text/text_generation/ernie_gen/module.py index 6101655b..8f1e35cc 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/module.py +++ b/hub_module/modules/text/text_generation/ernie_gen/module.py @@ -39,7 +39,7 @@ import ernie_gen.propeller.paddle as propeller @moduleinfo( name="ernie_gen", - version="1.0.1", + version="1.0.2", summary= "ERNIE-GEN is a multi-flow language generation framework for both pre-training and fine-tuning.", author="baidu", @@ -371,10 +371,11 @@ class ErnieGen(hub.Module): src_ids = src_ids[:self.max_encode_len] tgt_ids = tgt_ids[:self.max_decode_len] src_ids, src_sids = self.tokenizer.build_for_ernie(src_ids) - src_pids = np.arange(len(src_ids)) + src_pids = np.arange(len(src_ids), dtype=np.int64) tgt_ids, tgt_sids = self.tokenizer.build_for_ernie(tgt_ids) - tgt_pids = np.arange(len(tgt_ids)) + len(src_ids) # continues position + tgt_pids = np.arange( + len(tgt_ids), dtype=np.int64) + len(src_ids) # continues position tgt_sids = np.ones_like(tgt_sids) attn_ids = np.ones_like(tgt_ids) * self.tokenizer.vocab['[MASK]'] diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto index ba6da969..3c613917 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/example.proto @@ -16,8 +16,8 @@ // model training or inference. syntax = "proto3"; -import "propeller/paddle/data/feature.proto"; -package propeller; +import "ernie_gen.propeller/paddle/data/feature.proto"; +package ernie_gen.propeller; message Example { Features features = 1; diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto index 564b66bc..aa0f2dbc 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature.proto @@ -13,7 +13,7 @@ // limitations under the License. syntax = "proto3"; -package propeller; +package ernie_gen.propeller; message BytesList { repeated bytes value = 1; diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py index 775eef13..0493b888 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/data/feature_column.py @@ -125,7 +125,7 @@ class LabelColumn(Column): ids = int(raw) else: ids = self.vocab[raw] - return ids + return np.array(ids, dtype=np.int64) class TextColumn(Column): diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py index b15be46f..8cb559bf 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/hooks.py @@ -73,7 +73,7 @@ class TqdmProgressBarHook(RunHook): """doc""" self.tqdm = None import tqdm - from propeller import log as main_log + from ernie_gen.propeller import log as main_log hdl = main_log.handlers[0] class _TqdmLogginHandler(logging.Handler): @@ -110,7 +110,7 @@ class TqdmNotebookProgressBarHook(RunHook): """doc""" self.tqdm = None import tqdm - from propeller import log as main_log + from ernie_gen.propeller import log as main_log hdl = main_log.handlers[0] class _TqdmLogginHandler(logging.Handler): @@ -144,7 +144,7 @@ class TqdmNotebookProgressBarHook(RunHook): class LoggingHook(RunHook): - """log tensor in to screan and tensorboard""" + """log tensor in to screan and VisualDL""" def __init__(self, loss, @@ -205,7 +205,7 @@ class LoggingHook(RunHook): speed = -1. self.last_state = state - # log to tensorboard + # log to VisualDL if self.writer is not None: self.writer.add_scalar('loss', loss, state.gstep) for name, t in zip(self.s_name, s_np): diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py index b1fa7d62..07ac795b 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/paddle/train/trainer.py @@ -48,11 +48,11 @@ __all__ = ['train_and_eval', 'Learner'] def _get_summary_writer(path): summary_writer = None try: - from tensorboardX import SummaryWriter + from visualdl import LogWriter if distribution.status.is_master: - summary_writer = SummaryWriter(os.path.join(path)) + summary_writer = LogWriter(os.path.join(path)) except ImportError: - log.warning('tensorboardX not installed, will not log to tensorboard') + log.warning('VisualDL not installed, will not log to VisualDL') return summary_writer @@ -69,7 +69,7 @@ def _log_eval_result(name, eval_result, swriter, state): printable.append('{}\t{}'.format(n, val)) if swriter is not None: swriter.add_scalar(n, val, state.gstep) - log.debug('write to tensorboard %s' % swriter.logdir) + log.debug('write to VisualDL %s' % swriter.logdir) if len(printable): log.info('*** eval res: %10s ***' % name) @@ -134,10 +134,10 @@ class Learner(object): if run_config.model_dir is None: raise ValueError('model_dir should specified in run_config') - if issubclass(model_class_or_model_fn, Model): - _model_fn = _build_model_fn(model_class_or_model_fn) - elif inspect.isfunction(model_class_or_model_fn): + if inspect.isfunction(model_class_or_model_fn): _model_fn = model_class_or_model_fn + elif issubclass(model_class_or_model_fn, Model): + _model_fn = _build_model_fn(model_class_or_model_fn) else: raise ValueError('unknown model %s' % model_class_or_model_fn) diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/service/server.py b/hub_module/modules/text/text_generation/ernie_gen/propeller/service/server.py index 000a8c6a..b003cc80 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/service/server.py +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/service/server.py @@ -71,8 +71,8 @@ def run_worker(model_dir, device_idx, endpoint="ipc://worker.ipc"): "CUDA_VISIBLE_DEVICES").split(",")[device_idx] log.debug('cuda_env %s' % os.environ["CUDA_VISIBLE_DEVICES"]) import paddle.fluid as F - from propeller.service import interface_pb2 - import propeller.service.utils as serv_utils + from ernie_gen.propeller.service import interface_pb2 + import ernie_gen.propeller.service.utils as serv_utils context = zmq.Context() socket = context.socket(zmq.REP) socket.connect(endpoint) diff --git a/hub_module/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py b/hub_module/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py index e04f4eb8..be4eb1f7 100644 --- a/hub_module/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py +++ b/hub_module/modules/text/text_generation/ernie_gen/propeller/tools/ckpt_inspector.py @@ -26,7 +26,6 @@ import collections from distutils import dir_util import pickle -#from utils import print_arguments import paddle.fluid as F from paddle.fluid.proto import framework_pb2 diff --git a/hub_module/modules/text/text_generation/gpt2/__init__.py b/hub_module/modules/text/text_generation/gpt2/__init__.py new file mode 100644 index 00000000..e69de29b -- GitLab