diff --git a/demo/ernie-classification/run_question_matching.sh b/demo/ernie-classification/run_question_matching.sh index 4780434c986550158308df8c81c5644f9b8af48c..2230d8e0b713fc285f083dd3fb26d08a98d744df 100644 --- a/demo/ernie-classification/run_question_matching.sh +++ b/demo/ernie-classification/run_question_matching.sh @@ -1,9 +1,9 @@ export CUDA_VISIBLE_DEVICES=0 CKPT_DIR="./ckpt_question_matching" -python -u sentiment_cls.py \ +python -u question_matching.py \ --batch_size 32 \ - --weight_decay 0.00 \ + --weight_decay 0.0 \ --checkpoint_dir $CKPT_DIR \ --num_epoch 3 \ --max_seq_len 128 \ diff --git a/paddlehub/dataset/__init__.py b/paddlehub/dataset/__init__.py index a81791257e7a80ad2f075d52541d5070c7823fa8..1cb0086c10d4e0f7ded663d90695cd347712bbce 100644 --- a/paddlehub/dataset/__init__.py +++ b/paddlehub/dataset/__init__.py @@ -12,9 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +# NLP Dataset from .dataset import InputExample, HubDataset from .chnsenticorp import ChnSentiCorp from .msra_ner import MSRA_NER from .nlpcc_dbqa import NLPCC_DBQA +from .lcqmc import LCQMC + +# CV Dataset from .dogcat import DogCatDataset as DogCat from .flowers import FlowersDataset as Flowers diff --git a/paddlehub/dataset/lcqmc.py b/paddlehub/dataset/lcqmc.py index 75d027a1c642ec136bfa268896106b1f40b7b820..3a17733e3eacdb9852debc4db24788ff87482e3d 100644 --- a/paddlehub/dataset/lcqmc.py +++ b/paddlehub/dataset/lcqmc.py @@ -24,7 +24,7 @@ from paddlehub.common.logger import logger DATA_URL = "https://paddlehub-dataset.bj.bcebos.com/lcqmc.tar.gz" -class NLPCC_DBQA(HubDataset): +class LCQMC(HubDataset): def __init__(self): self.dataset_dir = os.path.join(DATA_HOME, "lcqmc") if not os.path.exists(self.dataset_dir): @@ -79,6 +79,6 @@ class NLPCC_DBQA(HubDataset): if __name__ == "__main__": - ds = NLPCC_DBQA() + ds = LCQMC() for e in ds.get_train_examples(): print("{}\t{}\t{}\t{}".format(e.guid, e.text_a, e.text_b, e.label)) diff --git a/paddlehub/module/checker.py b/paddlehub/module/checker.py index d8ecb4ff7e7f2d41bd8f2bb8fa57e9efd8f9b610..ad5a50529df24887e44591968c22d8ed74c7363b 100644 --- a/paddlehub/module/checker.py +++ b/paddlehub/module/checker.py @@ -154,16 +154,16 @@ class ModuleChecker: if not os.path.exists(file_path): if file_info.is_need: logger.error( - "module lack of necessary file [%s]" % file_path) + "Module incompleted! Missing file [%s]" % file_path) return False else: if file_type == check_info_pb2.FILE: if not os.path.isfile(file_path): - logger.error("file type error %s" % file_path) + logger.error("File type error %s" % file_path) return False if file_type == check_info_pb2.DIR: if not os.path.isdir(file_path): - logger.error("file type error %s" % file_path) + logger.error("File type error %s" % file_path) return False return True diff --git a/paddlehub/module/manager.py b/paddlehub/module/manager.py index 98bfa11835dc97a28fb622d0f72466c81b6b2971..59edcbd3330bf974fd2dc167ecd8257f0910d7f5 100644 --- a/paddlehub/module/manager.py +++ b/paddlehub/module/manager.py @@ -61,13 +61,14 @@ class LocalModuleManager: self.all_modules(update=True) if module_name in self.modules_dict: module_dir = self.modules_dict[module_name] - tips = "module %s already install in %s" % (module_name, module_dir) + tips = "Module %s already installed in %s" % (module_name, + module_dir) return True, tips, module_dir url = hub.default_hub_server.get_module_url( module_name, version=module_version) #TODO(wuzewu): add compatibility check if not url: - tips = "can't found module %s" % module_name + tips = "Can't find module %s" % module_name if module_version: tips += " with version %s" % module_version return False, tips, None diff --git a/paddlehub/module/module.py b/paddlehub/module/module.py index a9fbbb253810874ae6e594eb9640d7ba201e376c..8fb644c217296a9512963050b0d0acf39fb3b4d2 100644 --- a/paddlehub/module/module.py +++ b/paddlehub/module/module.py @@ -128,7 +128,7 @@ class Module(object): self._generate_module_info(module_info) self._init_with_signature(signatures=signatures) else: - raise "Error! HubModule can't init with nothing" + raise ValueError("Error! Module initialized parameter is empty") def _init_with_name(self, name): logger.info("Try installing module %s" % name) @@ -191,7 +191,8 @@ class Module(object): def _init_with_module_file(self, module_dir): checker = ModuleChecker(module_dir) if not checker.check(): - logger.error("Module init failed on {}".format(module_dir)) + logger.error( + "Module initialization failed on {}".format(module_dir)) exit(1) self.helper = ModuleHelper(module_dir) @@ -223,7 +224,9 @@ class Module(object): self.program = signatures[0].inputs[0].block.program for sign in signatures: if sign.name in self.signatures: - raise "Error! signature array contains repeat signatrue %s" % sign + raise ValueError( + "Error! Signature array contains duplicated signatrues %s" % + sign) if self.default_signature is None and sign.for_predict: self.default_signature = sign self.signatures[sign.name] = sign @@ -265,7 +268,7 @@ class Module(object): self.module_info = {} else: if not utils.is_yaml_file(module_info): - logger.critical("module info file should in yaml format") + logger.critical("Module info file should be yaml format") exit(1) self.module_info = yaml_parser.parse(module_info) self.author = self.module_info.get('author', 'UNKNOWN') @@ -532,7 +535,7 @@ class Module(object): return self.get_name_prefix() + var_name def _check_signatures(self): - assert self.signatures, "signature array should not be None" + assert self.signatures, "Signature array should not be None" for key, sign in self.signatures.items(): assert isinstance(sign, diff --git a/paddlehub/module/signature.py b/paddlehub/module/signature.py index 11d5bdea3b6466131b27ceab2f467d342ebd3251..243dc5dc3b465dfe25b16a9a508d97dbda9a42b0 100644 --- a/paddlehub/module/signature.py +++ b/paddlehub/module/signature.py @@ -47,13 +47,11 @@ class Signature: self.name = name for item in inputs: assert isinstance( - item, - Variable), "the item of inputs list shoule be paddle Variable" + item, Variable), "the item of inputs list shoule be Variable" for item in outputs: assert isinstance( - item, - Variable), "the item of outputs list shoule be paddle Variable" + item, Variable), "the item of outputs list shoule be Variable" self.inputs = inputs self.outputs = outputs diff --git a/paddlehub/version.py b/paddlehub/version.py index 61c98cc21c95e4098a5aee992863bdc72f91b0be..d284329b957269fa35016ababe9098eccfaab4f2 100644 --- a/paddlehub/version.py +++ b/paddlehub/version.py @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. """ PaddleHub version string """ -hub_version = "0.3.1.alpha" -module_proto_version = "0.1.0" +hub_version = "0.4.0.alpha" +module_proto_version = "1.0.0"