未验证 提交 4bdad22b 编写于 作者: Z Zeyu Chen 提交者: GitHub

Update v1.7 document

update version 
...@@ -18,7 +18,7 @@ parser.add_argument( ...@@ -18,7 +18,7 @@ parser.add_argument(
default="mobilenet", default="mobilenet",
help="Module used as feature extractor.") help="Module used as feature extractor.")
# the name of hyperparameters to be searched should keep with hparam.py # the name of hyper-parameters to be searched should keep with hparam.py
parser.add_argument( parser.add_argument(
"--batch_size", "--batch_size",
type=int, type=int,
...@@ -27,7 +27,7 @@ parser.add_argument( ...@@ -27,7 +27,7 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--learning_rate", type=float, default=1e-4, help="learning_rate.") "--learning_rate", type=float, default=1e-4, help="learning_rate.")
# saved_params_dir and model_path are needed by auto finetune # saved_params_dir and model_path are needed by auto fine-tune
parser.add_argument( parser.add_argument(
"--saved_params_dir", "--saved_params_dir",
type=str, type=str,
...@@ -76,7 +76,7 @@ def finetune(args): ...@@ -76,7 +76,7 @@ def finetune(args):
img = input_dict["image"] img = input_dict["image"]
feed_list = [img.name] feed_list = [img.name]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.DefaultFinetuneStrategy(learning_rate=args.learning_rate) strategy = hub.DefaultFinetuneStrategy(learning_rate=args.learning_rate)
config = hub.RunConfig( config = hub.RunConfig(
use_cuda=True, use_cuda=True,
...@@ -100,7 +100,7 @@ def finetune(args): ...@@ -100,7 +100,7 @@ def finetune(args):
task.load_parameters(args.model_path) task.load_parameters(args.model_path)
logger.info("PaddleHub has loaded model from %s" % args.model_path) logger.info("PaddleHub has loaded model from %s" % args.model_path)
# Finetune by PaddleHub's API # Fine-tune by PaddleHub's API
task.finetune() task.finetune()
# Evaluate by PaddleHub's API # Evaluate by PaddleHub's API
run_states = task.eval() run_states = task.eval()
...@@ -114,7 +114,7 @@ def finetune(args): ...@@ -114,7 +114,7 @@ def finetune(args):
shutil.copytree(best_model_dir, args.saved_params_dir) shutil.copytree(best_model_dir, args.saved_params_dir)
shutil.rmtree(config.checkpoint_dir) shutil.rmtree(config.checkpoint_dir)
# acc on dev will be used by auto finetune # acc on dev will be used by auto fine-tune
hub.report_final_result(eval_avg_score["acc"]) hub.report_final_result(eval_avg_score["acc"])
......
...@@ -13,7 +13,7 @@ from paddlehub.common.logger import logger ...@@ -13,7 +13,7 @@ from paddlehub.common.logger import logger
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--epochs", type=int, default=3, help="epochs.") parser.add_argument("--epochs", type=int, default=3, help="epochs.")
# the name of hyperparameters to be searched should keep with hparam.py # the name of hyper-parameters to be searched should keep with hparam.py
parser.add_argument("--batch_size", type=int, default=32, help="batch_size.") parser.add_argument("--batch_size", type=int, default=32, help="batch_size.")
parser.add_argument( parser.add_argument(
"--learning_rate", type=float, default=5e-5, help="learning_rate.") "--learning_rate", type=float, default=5e-5, help="learning_rate.")
...@@ -33,7 +33,7 @@ parser.add_argument( ...@@ -33,7 +33,7 @@ parser.add_argument(
default=None, default=None,
help="Directory to model checkpoint") help="Directory to model checkpoint")
# saved_params_dir and model_path are needed by auto finetune # saved_params_dir and model_path are needed by auto fine-tune
parser.add_argument( parser.add_argument(
"--saved_params_dir", "--saved_params_dir",
type=str, type=str,
...@@ -82,14 +82,14 @@ if __name__ == '__main__': ...@@ -82,14 +82,14 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_prop, warmup_proportion=args.warmup_prop,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
lr_scheduler="linear_decay") lr_scheduler="linear_decay")
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
use_cuda=True, use_cuda=True,
...@@ -98,7 +98,7 @@ if __name__ == '__main__': ...@@ -98,7 +98,7 @@ if __name__ == '__main__':
enable_memory_optim=True, enable_memory_optim=True,
strategy=strategy) strategy=strategy)
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
...@@ -125,5 +125,5 @@ if __name__ == '__main__': ...@@ -125,5 +125,5 @@ if __name__ == '__main__':
shutil.copytree(best_model_dir, args.saved_params_dir) shutil.copytree(best_model_dir, args.saved_params_dir)
shutil.rmtree(config.checkpoint_dir) shutil.rmtree(config.checkpoint_dir)
# acc on dev will be used by auto finetune # acc on dev will be used by auto fine-tune
hub.report_final_result(eval_avg_score["acc"]) hub.report_final_result(eval_avg_score["acc"])
...@@ -14,7 +14,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True ...@@ -14,7 +14,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.") parser.add_argument("--module", type=str, default="resnet50", help="Module used as feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.") parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to fine-tune.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
# yapf: enable. # yapf: enable.
...@@ -60,7 +60,7 @@ def finetune(args): ...@@ -60,7 +60,7 @@ def finetune(args):
# Setup feed list for data feeder # Setup feed list for data feeder
feed_list = [input_dict["image"].name] feed_list = [input_dict["image"].name]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -69,7 +69,7 @@ def finetune(args): ...@@ -69,7 +69,7 @@ def finetune(args):
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a reading comprehension finetune task by PaddleHub's API # Define a image classification task by PaddleHub Fine-tune API
task = hub.ImageClassifierTask( task = hub.ImageClassifierTask(
data_reader=data_reader, data_reader=data_reader,
feed_list=feed_list, feed_list=feed_list,
...@@ -77,7 +77,7 @@ def finetune(args): ...@@ -77,7 +77,7 @@ def finetune(args):
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config)
# Finetune by PaddleHub's API # Fine-tune by PaddleHub's API
task.finetune_and_eval() task.finetune_and_eval()
......
...@@ -13,7 +13,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True ...@@ -13,7 +13,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True
parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.") parser.add_argument("--checkpoint_dir", type=str, default="paddlehub_finetune_ckpt", help="Path to save log data.")
parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=16, help="Total examples' number in batch for training.")
parser.add_argument("--module", type=str, default="resnet50", help="Module used as a feature extractor.") parser.add_argument("--module", type=str, default="resnet50", help="Module used as a feature extractor.")
parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to finetune.") parser.add_argument("--dataset", type=str, default="flowers", help="Dataset to fine-tune.")
# yapf: enable. # yapf: enable.
module_map = { module_map = {
...@@ -58,7 +58,7 @@ def predict(args): ...@@ -58,7 +58,7 @@ def predict(args):
# Setup feed list for data feeder # Setup feed list for data feeder
feed_list = [input_dict["image"].name] feed_list = [input_dict["image"].name]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -66,7 +66,7 @@ def predict(args): ...@@ -66,7 +66,7 @@ def predict(args):
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a reading comprehension finetune task by PaddleHub's API # Define a image classification task by PaddleHub Fine-tune API
task = hub.ImageClassifierTask( task = hub.ImageClassifierTask(
data_reader=data_reader, data_reader=data_reader,
feed_list=feed_list, feed_list=feed_list,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -23,7 +23,7 @@ import paddlehub as hub ...@@ -23,7 +23,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
...@@ -56,13 +56,13 @@ if __name__ == '__main__': ...@@ -56,13 +56,13 @@ if __name__ == '__main__':
# Use "pooled_output" for classification tasks on an entire sentence. # Use "pooled_output" for classification tasks on an entire sentence.
pooled_output = outputs["pooled_output"] pooled_output = outputs["pooled_output"]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
num_epoch=args.num_epoch, num_epoch=args.num_epoch,
...@@ -70,7 +70,7 @@ if __name__ == '__main__': ...@@ -70,7 +70,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
multi_label_cls_task = hub.MultiLabelClassifierTask( multi_label_cls_task = hub.MultiLabelClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
...@@ -78,6 +78,6 @@ if __name__ == '__main__': ...@@ -78,6 +78,6 @@ if __name__ == '__main__':
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
multi_label_cls_task.finetune_and_eval() multi_label_cls_task.finetune_and_eval()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -35,7 +35,7 @@ parser = argparse.ArgumentParser(__doc__) ...@@ -35,7 +35,7 @@ parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -65,7 +65,7 @@ if __name__ == '__main__': ...@@ -65,7 +65,7 @@ if __name__ == '__main__':
# Use "sequence_output" for token-level output. # Use "sequence_output" for token-level output.
pooled_output = outputs["pooled_output"] pooled_output = outputs["pooled_output"]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -73,7 +73,7 @@ if __name__ == '__main__': ...@@ -73,7 +73,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
multi_label_cls_task = hub.MultiLabelClassifierTask( multi_label_cls_task = hub.MultiLabelClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -23,7 +23,7 @@ import paddlehub as hub ...@@ -23,7 +23,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
...@@ -61,13 +61,13 @@ if __name__ == '__main__': ...@@ -61,13 +61,13 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -76,7 +76,7 @@ if __name__ == '__main__': ...@@ -76,7 +76,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
...@@ -84,6 +84,6 @@ if __name__ == '__main__': ...@@ -84,6 +84,6 @@ if __name__ == '__main__':
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
cls_task.finetune_and_eval() cls_task.finetune_and_eval()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -33,7 +33,7 @@ parser = argparse.ArgumentParser(__doc__) ...@@ -33,7 +33,7 @@ parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=128, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -63,7 +63,7 @@ if __name__ == '__main__': ...@@ -63,7 +63,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -71,7 +71,7 @@ if __name__ == '__main__': ...@@ -71,7 +71,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -28,7 +28,7 @@ hub.common.logger.logger.setLevel("INFO") ...@@ -28,7 +28,7 @@ hub.common.logger.logger.setLevel("INFO")
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint.") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint.")
parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -72,7 +72,7 @@ if __name__ == '__main__': ...@@ -72,7 +72,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a reading comprehension finetune task by PaddleHub's API # Define a reading comprehension fine-tune task by PaddleHub's API
reading_comprehension_task = hub.ReadingComprehensionTask( reading_comprehension_task = hub.ReadingComprehensionTask(
data_reader=reader, data_reader=reader,
feature=seq_output, feature=seq_output,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -25,7 +25,7 @@ hub.common.logger.logger.setLevel("INFO") ...@@ -25,7 +25,7 @@ hub.common.logger.logger.setLevel("INFO")
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
...@@ -64,13 +64,13 @@ if __name__ == '__main__': ...@@ -64,13 +64,13 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
warmup_proportion=args.warmup_proportion) warmup_proportion=args.warmup_proportion)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
eval_interval=300, eval_interval=300,
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
...@@ -80,7 +80,7 @@ if __name__ == '__main__': ...@@ -80,7 +80,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a reading comprehension finetune task by PaddleHub's API # Define a reading comprehension fine-tune task by PaddleHub's API
reading_comprehension_task = hub.ReadingComprehensionTask( reading_comprehension_task = hub.ReadingComprehensionTask(
data_reader=reader, data_reader=reader,
feature=seq_output, feature=seq_output,
...@@ -89,5 +89,5 @@ if __name__ == '__main__': ...@@ -89,5 +89,5 @@ if __name__ == '__main__':
sub_task="squad", sub_task="squad",
) )
# Finetune by PaddleHub's API # Fine-tune by PaddleHub's API
reading_comprehension_task.finetune_and_eval() reading_comprehension_task.finetune_and_eval()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -33,7 +33,7 @@ parser = argparse.ArgumentParser(__doc__) ...@@ -33,7 +33,7 @@ parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -72,7 +72,7 @@ if __name__ == '__main__': ...@@ -72,7 +72,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a regression finetune task by PaddleHub's API # Define a regression fine-tune task by PaddleHub's API
reg_task = hub.RegressionTask( reg_task = hub.RegressionTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -23,7 +23,7 @@ import paddlehub as hub ...@@ -23,7 +23,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
...@@ -62,13 +62,13 @@ if __name__ == '__main__': ...@@ -62,13 +62,13 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
eval_interval=300, eval_interval=300,
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
...@@ -78,13 +78,13 @@ if __name__ == '__main__': ...@@ -78,13 +78,13 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a regression finetune task by PaddleHub's API # Define a regression fine-tune task by PaddleHub's API
reg_task = hub.RegressionTask( reg_task = hub.RegressionTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
feed_list=feed_list, feed_list=feed_list,
config=config) config=config)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
reg_task.finetune_and_eval() reg_task.finetune_and_eval()
...@@ -16,7 +16,7 @@ import paddlehub as hub ...@@ -16,7 +16,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch when the program predicts.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch when the program predicts.")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -37,7 +37,7 @@ if __name__ == '__main__': ...@@ -37,7 +37,7 @@ if __name__ == '__main__':
# Must feed all the tensor of senta's module need # Must feed all the tensor of senta's module need
feed_list = [inputs["words"].name] feed_list = [inputs["words"].name]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -45,7 +45,7 @@ if __name__ == '__main__': ...@@ -45,7 +45,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=sent_feature, feature=sent_feature,
......
...@@ -8,7 +8,7 @@ import paddlehub as hub ...@@ -8,7 +8,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
args = parser.parse_args() args = parser.parse_args()
...@@ -30,7 +30,7 @@ if __name__ == '__main__': ...@@ -30,7 +30,7 @@ if __name__ == '__main__':
# Must feed all the tensor of senta's module need # Must feed all the tensor of senta's module need
feed_list = [inputs["words"].name] feed_list = [inputs["words"].name]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
use_pyreader=False, use_pyreader=False,
...@@ -40,7 +40,7 @@ if __name__ == '__main__': ...@@ -40,7 +40,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=sent_feature, feature=sent_feature,
...@@ -48,6 +48,6 @@ if __name__ == '__main__': ...@@ -48,6 +48,6 @@ if __name__ == '__main__':
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config) config=config)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
cls_task.finetune_and_eval() cls_task.finetune_and_eval()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on sequence labeling task """ """Fine-tuning on sequence labeling task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -27,14 +27,13 @@ import time ...@@ -27,14 +27,13 @@ import time
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -67,7 +66,7 @@ if __name__ == '__main__': ...@@ -67,7 +66,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=False, use_data_parallel=False,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -75,7 +74,7 @@ if __name__ == '__main__': ...@@ -75,7 +74,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a sequence labeling finetune task by PaddleHub's API # Define a sequence labeling fine-tune task by PaddleHub's API
# if add crf, the network use crf as decoder # if add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask( seq_label_task = hub.SequenceLabelTask(
data_reader=reader, data_reader=reader,
...@@ -84,7 +83,7 @@ if __name__ == '__main__': ...@@ -84,7 +83,7 @@ if __name__ == '__main__':
max_seq_len=args.max_seq_len, max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config, config=config,
add_crf=True) add_crf=False)
# Data to be predicted # Data to be predicted
# If using python 2, prefix "u" is necessary # If using python 2, prefix "u" is necessary
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on sequence labeling task.""" """Fine-tuning on sequence labeling task."""
import argparse import argparse
import ast import ast
...@@ -23,7 +23,7 @@ import paddlehub as hub ...@@ -23,7 +23,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
...@@ -60,13 +60,13 @@ if __name__ == '__main__': ...@@ -60,13 +60,13 @@ if __name__ == '__main__':
inputs["segment_ids"].name, inputs["input_mask"].name inputs["segment_ids"].name, inputs["input_mask"].name
] ]
# Select a finetune strategy # Select a fine-tune strategy
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -75,7 +75,7 @@ if __name__ == '__main__': ...@@ -75,7 +75,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a sequence labeling finetune task by PaddleHub's API # Define a sequence labeling fine-tune task by PaddleHub's API
# If add crf, the network use crf as decoder # If add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask( seq_label_task = hub.SequenceLabelTask(
data_reader=reader, data_reader=reader,
...@@ -84,8 +84,8 @@ if __name__ == '__main__': ...@@ -84,8 +84,8 @@ if __name__ == '__main__':
max_seq_len=args.max_seq_len, max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels, num_classes=dataset.num_labels,
config=config, config=config,
add_crf=True) add_crf=False)
# Finetune and evaluate model by PaddleHub's API # Fine-tune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
seq_label_task.finetune_and_eval() seq_label_task.finetune_and_eval()
#coding:utf-8 #coding:utf-8
import os import os
import paddlehub as hub import paddlehub as hub
import cv2
if __name__ == "__main__": if __name__ == "__main__":
ssd = hub.Module(name="ssd_mobilenet_v1_pascal") ssd = hub.Module(name="ssd_mobilenet_v1_pascal")
test_img_path = os.path.join("test", "test_img_bird.jpg") test_img_path = os.path.join("test", "test_img_bird.jpg")
# get the input keys for signature 'object_detection'
data_format = ssd.processor.data_format(sign_name='object_detection')
key = list(data_format.keys())[0]
# set input dict
input_dict = {key: [test_img_path]}
# execute predict and print the result # execute predict and print the result
results = ssd.object_detection(data=input_dict) results = ssd.object_detection(images=[cv2.imread(test_img_path)])
for result in results: for result in results:
hub.logger.info(result) print(result)
...@@ -2,9 +2,29 @@ ...@@ -2,9 +2,29 @@
本示例将展示如何使用PaddleHub Fine-tune API以及Transformer类预训练模型(ERNIE/BERT/RoBERTa)完成分类任务。 本示例将展示如何使用PaddleHub Fine-tune API以及Transformer类预训练模型(ERNIE/BERT/RoBERTa)完成分类任务。
**PaddleHub 1.7.0以上版本支持在Transformer类预训练模型之后拼接预置网络(bow, bilstm, cnn, dpcnn, gru, lstm)完成文本分类任务**
## 目录结构
.
├── finetuned_model_to_module # PaddleHub Fine-tune得到模型如何转化为module,从而利用PaddleHub Serving部署
│   ├── __init__.py
│   └── module.py
├── predict_predefine_net.py # 加入预置网络预测脚本
├── predict.py # 不使用预置网络(使用fc网络)的预测脚本
├── README.md # 文本分类迁移学习文档说明
├── run_cls_predefine_net.sh # 加入预置网络的文本分类任务训练启动脚本
├── run_cls.sh # 不使用预置网络(使用fc网络)的训练启动脚本
├── run_predict_predefine_net.sh # 使用预置网络(使用fc网络)的预测启动脚本
├── run_predict.sh # # 不使用预置网络(使用fc网络)的预测启动脚本
├── text_classifier_dygraph.py # 动态图训练脚本
├── text_cls_predefine_net.py # 加入预置网络训练脚本
└── text_cls.py # 不使用预置网络(使用fc网络)的训练脚本
## 如何开始Fine-tune ## 如何开始Fine-tune
在完成安装PaddlePaddle与PaddleHub后,通过执行脚本`sh run_classifier.sh`即可开始使用ERNIE对ChnSentiCorp数据集进行Fine-tune。 以下例子已不使用预置网络完成文本分类任务,说明PaddleHub如何完成迁移学习。使用预置网络完成文本分类任务,步骤类似。
在完成安装PaddlePaddle与PaddleHub后,通过执行脚本`sh run_cls.sh`即可开始使用ERNIE对ChnSentiCorp数据集进行Fine-tune。
其中脚本参数说明如下: 其中脚本参数说明如下:
...@@ -164,9 +184,26 @@ cls_task = hub.TextClassifierTask( ...@@ -164,9 +184,26 @@ cls_task = hub.TextClassifierTask(
cls_task.finetune_and_eval() cls_task.finetune_and_eval()
``` ```
**NOTE:** **NOTE:**
1. `outputs["pooled_output"]`返回了ERNIE/BERT模型对应的[CLS]向量,可以用于句子或句对的特征表达。 1. `outputs["pooled_output"]`返回了Transformer类预训练模型对应的[CLS]向量,可以用于句子或句对的特征表达。
2. `feed_list`中的inputs参数指名了ERNIE/BERT中的输入tensor的顺序,与ClassifyReader返回的结果一致。 2. `feed_list`中的inputs参数指名了Transformer类预训练模型中的输入tensor的顺序,与ClassifyReader返回的结果一致。
3. `hub.TextClassifierTask`通过输入特征,label与迁移的类别数,可以生成适用于文本分类的迁移任务`TextClassifierTask` 3. `hub.TextClassifierTask`通过输入特征,label与迁移的类别数,可以生成适用于文本分类的迁移任务`TextClassifierTask`
4. 使用预置网络与否,传入`hub.TextClassifierTask`的特征不相同。`hub.TextClassifierTask`通过参数`feature``token_feature`区分。
`feature`应是sentence-level特征,shape应为[-1, emb_size];`token_feature`是token-levle特征,shape应为[-1, max_seq_len, emb_size]。
如果使用预置网络,则应取Transformer类预训练模型的sequence_output特征(`outputs["sequence_output"]`)。并且`hub.TextClassifierTask(token_feature=outputs["sequence_output"])`
如果不使用预置网络,直接通过fc网络进行分类,则应取Transformer类预训练模型的pooled_output特征(`outputs["pooled_output"]`)。并且`hub.TextClassifierTask(feature=outputs["pooled_output"])`
5. 使用预置网络,可以通过`hub.TextClassifierTask`参数network进行指定不同的网络结构。如下代码表示选择bilstm网络拼接在Transformer类预训练模型之后。
PaddleHub文本分类任务预置网络支持BOW,Bi-LSTM,CNN,DPCNN,GRU,LSTM。指定network应是其中之一。
```python
cls_task = hub.TextClassifierTask(
data_reader=reader,
token_feature=outputs["sequence_output"],
feed_list=feed_list,
network='bilstm',
num_classes=dataset.num_labels,
config=config,
metrics_choices=metrics_choices)
```
#### 自定义迁移任务 #### 自定义迁移任务
...@@ -190,29 +227,9 @@ python predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 ...@@ -190,29 +227,9 @@ python predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128
``` ```
其中CKPT_DIR为Fine-tune API保存最佳模型的路径, max_seq_len是ERNIE模型的最大序列长度,*请与训练时配置的参数保持一致* 其中CKPT_DIR为Fine-tune API保存最佳模型的路径, max_seq_len是ERNIE模型的最大序列长度,*请与训练时配置的参数保持一致*
参数配置正确后,请执行脚本`sh run_predict.sh`,即可看到以下文本分类预测结果, 以及最终准确率。 参数配置正确后,请执行脚本`sh run_predict.sh`,即可看到文本分类预测结果。
如需了解更多预测步骤,请参考`predict.py`
```
这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般 predict=0
交通方便;环境很好;服务态度很好 房间较小 predict=1
19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~ predict=0
```
我们在AI Studio上提供了IPython NoteBook形式的demo,您可以直接在平台上在线体验,链接如下: 我们在AI Studio上提供了IPython NoteBook形式的demo,点击[PaddleHub教程合集](https://aistudio.baidu.com/aistudio/projectdetail/231146),可使用AI Studio平台提供的GPU算力进行快速尝试。
|预训练模型|任务类型|数据集|AIStudio链接|备注|
|-|-|-|-|-|
|ResNet|图像分类|猫狗数据集DogCat|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/147010)||
|ERNIE|文本分类|中文情感分类数据集ChnSentiCorp|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/147006)||
|ERNIE|文本分类|中文新闻分类数据集THUNEWS|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/221999)|本教程讲述了如何将自定义数据集加载,并利用Fine-tune API完成文本分类迁移学习。|
|ERNIE|序列标注|中文序列标注数据集MSRA_NER|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/147009)||
|ERNIE|序列标注|中文快递单数据集Express|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/184200)|本教程讲述了如何将自定义数据集加载,并利用Fine-tune API完成序列标注迁移学习。|
|ERNIE Tiny|文本分类|中文情感分类数据集ChnSentiCorp|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/186443)||
|Senta|文本分类|中文情感分类数据集ChnSentiCorp|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/216846)|本教程讲述了任何利用Senta和Fine-tune API完成情感分类迁移学习。|
|Senta|情感分析预测|N/A|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/215814)||
|LAC|词法分析|N/A|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/215711)||
|Ultra-Light-Fast-Generic-Face-Detector-1MB|人脸检测|N/A|[点击体验](https://aistudio.baidu.com/aistudio/projectdetail/215962)||
## 超参优化AutoDL Finetuner ## 超参优化AutoDL Finetuner
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -32,7 +32,7 @@ parser = argparse.ArgumentParser(__doc__) ...@@ -32,7 +32,7 @@ parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
...@@ -70,7 +70,7 @@ if __name__ == '__main__': ...@@ -70,7 +70,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -78,7 +78,7 @@ if __name__ == '__main__': ...@@ -78,7 +78,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -32,7 +32,7 @@ parser = argparse.ArgumentParser(__doc__) ...@@ -32,7 +32,7 @@ parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
parser.add_argument("--network", type=str, default='bilstm', help="Pre-defined network which was connected after Transformer model, such as ERNIE, BERT ,RoBERTa and ELECTRA.") parser.add_argument("--network", type=str, default='bilstm', help="Pre-defined network which was connected after Transformer model, such as ERNIE, BERT ,RoBERTa and ELECTRA.")
args = parser.parse_args() args = parser.parse_args()
...@@ -71,7 +71,7 @@ if __name__ == '__main__': ...@@ -71,7 +71,7 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -79,7 +79,7 @@ if __name__ == '__main__': ...@@ -79,7 +79,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=hub.AdamWeightDecayStrategy()) strategy=hub.AdamWeightDecayStrategy())
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
# network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network) # network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network)
# If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module, # If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module,
# you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask, # you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask,
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -21,7 +21,7 @@ import paddlehub as hub ...@@ -21,7 +21,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
...@@ -68,13 +68,13 @@ if __name__ == '__main__': ...@@ -68,13 +68,13 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -83,7 +83,7 @@ if __name__ == '__main__': ...@@ -83,7 +83,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
cls_task = hub.TextClassifierTask( cls_task = hub.TextClassifierTask(
data_reader=reader, data_reader=reader,
feature=pooled_output, feature=pooled_output,
...@@ -92,6 +92,6 @@ if __name__ == '__main__': ...@@ -92,6 +92,6 @@ if __name__ == '__main__':
config=config, config=config,
metrics_choices=metrics_choices) metrics_choices=metrics_choices)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
cls_task.finetune_and_eval() cls_task.finetune_and_eval()
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""Finetuning on classification task """ """Fine-tuning on classification task """
import argparse import argparse
import ast import ast
...@@ -21,7 +21,7 @@ import paddlehub as hub ...@@ -21,7 +21,7 @@ import paddlehub as hub
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
...@@ -69,13 +69,13 @@ if __name__ == '__main__': ...@@ -69,13 +69,13 @@ if __name__ == '__main__':
inputs["input_mask"].name, inputs["input_mask"].name,
] ]
# Select finetune strategy, setup config and finetune # Select fine-tune strategy, setup config and fine-tune
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
warmup_proportion=args.warmup_proportion, warmup_proportion=args.warmup_proportion,
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate) learning_rate=args.learning_rate)
# Setup runing config for PaddleHub Finetune API # Setup RunConfig for PaddleHub Fine-tune API
config = hub.RunConfig( config = hub.RunConfig(
use_data_parallel=args.use_data_parallel, use_data_parallel=args.use_data_parallel,
use_cuda=args.use_gpu, use_cuda=args.use_gpu,
...@@ -84,7 +84,7 @@ if __name__ == '__main__': ...@@ -84,7 +84,7 @@ if __name__ == '__main__':
checkpoint_dir=args.checkpoint_dir, checkpoint_dir=args.checkpoint_dir,
strategy=strategy) strategy=strategy)
# Define a classfication finetune task by PaddleHub's API # Define a classfication fine-tune task by PaddleHub's API
# network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network) # network choice: bilstm, bow, cnn, dpcnn, gru, lstm (PaddleHub pre-defined network)
# If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module, # If you wanna add network after ERNIE/BERT/RoBERTa/ELECTRA module,
# you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask, # you must use the outputs["sequence_output"] as the token_feature of TextClassifierTask,
...@@ -98,6 +98,6 @@ if __name__ == '__main__': ...@@ -98,6 +98,6 @@ if __name__ == '__main__':
config=config, config=config,
metrics_choices=metrics_choices) metrics_choices=metrics_choices)
# Finetune and evaluate by PaddleHub's API # Fine-tune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically # will finish training, evaluation, testing, save model automatically
cls_task.finetune_and_eval() cls_task.finetune_and_eval()
...@@ -13,22 +13,22 @@ Task的基本方法和属性参见[BaseTask](base_task.md)。 ...@@ -13,22 +13,22 @@ Task的基本方法和属性参见[BaseTask](base_task.md)。
PaddleHub预置了常见任务的Task,每种Task都有自己特有的应用场景以及提供了对应的度量指标,用于适应用户的不同需求。预置的任务类型如下: PaddleHub预置了常见任务的Task,每种Task都有自己特有的应用场景以及提供了对应的度量指标,用于适应用户的不同需求。预置的任务类型如下:
* 图像分类任务 * 图像分类任务
[ImageClassifierTask]() [ImageClassifierTask](image_classify_task.md)
* 文本分类任务 * 文本分类任务
[TextClassifierTask]() [TextClassifierTask](text_classify_task.md)
* 序列标注任务 * 序列标注任务
[SequenceLabelTask]() [SequenceLabelTask](sequence_label_task.md)
* 多标签分类任务 * 多标签分类任务
[MultiLabelClassifierTask]() [MultiLabelClassifierTask](multi_lable_classify_task.md)
* 回归任务 * 回归任务
[RegressionTask]() [RegressionTask](regression_task.md)
* 阅读理解任务 * 阅读理解任务
[ReadingComprehensionTask]() [ReadingComprehensionTask](reading_comprehension_task.md)
## 自定义Task ## 自定义Task
如果这些Task不支持您的特定需求,您也可以通过继承BasicTask来实现自己的任务,具体实现细节参见[自定义Task]() 如果这些Task不支持您的特定需求,您也可以通过继承BasicTask来实现自己的任务,具体实现细节参见[自定义Task](../../tutorial/how_to_define_task.md)以及[修改Task中的模型网络](../../tutorial/define_task_example.md)
## 修改Task内置方法 ## 修改Task内置方法
如果Task内置方法不满足您的需求,您可以通过Task支持的Hook机制修改方法实现,详细信息参见[修改Task内置方法]() 如果Task内置方法不满足您的需求,您可以通过Task支持的Hook机制修改方法实现,详细信息参见[修改Task内置方法](../../tutorial/hook.md)
...@@ -2,23 +2,28 @@ ...@@ -2,23 +2,28 @@
文本分类任务Task,继承自[BaseTask](base_task.md),该Task基于输入的特征,添加一个Dropout层,以及一个或多个全连接层来创建一个文本分类任务用于finetune,度量指标为准确率,损失函数为交叉熵Loss。 文本分类任务Task,继承自[BaseTask](base_task.md),该Task基于输入的特征,添加一个Dropout层,以及一个或多个全连接层来创建一个文本分类任务用于finetune,度量指标为准确率,损失函数为交叉熵Loss。
```python ```python
hub.TextClassifierTask( hub.TextClassifierTask(
feature,
num_classes, num_classes,
feed_list, feed_list,
data_reader, data_reader,
feature=None,
token_feature=None,
startup_program=None, startup_program=None,
config=None, config=None,
hidden_units=None, hidden_units=None,
network=None,
metrics_choices="default"): metrics_choices="default"):
``` ```
**参数** **参数**
* feature (fluid.Variable): 输入的特征矩阵。
* num_classes (int): 分类任务的类别数量 * num_classes (int): 分类任务的类别数量
* feed_list (list): 待feed变量的名字列表 * feed_list (list): 待feed变量的名字列表
* data_reader: 提供数据的Reader * data_reader: 提供数据的Reader,可选为ClassifyReader和LACClassifyReader。
* feature(fluid.Variable): 输入的sentence-level特征矩阵,shape应为[-1, emb_size]。默认为None。
* token_feature(fluid.Variable): 输入的token-level特征矩阵,shape应为[-1, seq_len, emb_size]。默认为None。feature和token_feature须指定其中一个。
* network(str): 文本分类任务PaddleHub预置网络,支持BOW,Bi-LSTM,CNN,DPCNN,GRU,LSTM。如果指定network,则应使用token_feature作为输入特征。
* startup_program (fluid.Program): 存储了模型参数初始化op的Program,如果未提供,则使用fluid.default_startup_program() * startup_program (fluid.Program): 存储了模型参数初始化op的Program,如果未提供,则使用fluid.default_startup_program()
* config ([RunConfig](../config.md)): 运行配置 * config ([RunConfig](../config.md)): 运行配置,如设置batch_size,epoch,learning_rate等。
* hidden_units (list): TextClassifierTask最终的全连接层输出维度为label_size,是每个label的概率值。在这个全连接层之前可以设置额外的全连接层,并指定它们的输出维度,例如hidden_units=[4,2]表示先经过一层输出维度为4的全连接层,再输入一层输出维度为2的全连接层,最后再输入输出维度为label_size的全连接层。 * hidden_units (list): TextClassifierTask最终的全连接层输出维度为label_size,是每个label的概率值。在这个全连接层之前可以设置额外的全连接层,并指定它们的输出维度,例如hidden_units=[4,2]表示先经过一层输出维度为4的全连接层,再输入一层输出维度为2的全连接层,最后再输入输出维度为label_size的全连接层。
* metrics_choices("default" or list ⊂ ["acc", "f1", "matthews"]): 任务训练过程中需要计算的评估指标,默认为“default”,此时等效于["acc"]。metrics_choices支持训练过程中同时评估多个指标,其中指定的第一个指标将被作为主指标用于判断当前得分是否为最佳分值,例如["matthews", "acc"],"matthews"将作为主指标,参与最佳模型的判断中;“acc”只计算并输出,不参与最佳模型的判断。 * metrics_choices("default" or list ⊂ ["acc", "f1", "matthews"]): 任务训练过程中需要计算的评估指标,默认为“default”,此时等效于["acc"]。metrics_choices支持训练过程中同时评估多个指标,其中指定的第一个指标将被作为主指标用于判断当前得分是否为最佳分值,例如["matthews", "acc"],"matthews"将作为主指标,参与最佳模型的判断中;“acc”只计算并输出,不参与最佳模型的判断。
...@@ -28,4 +33,4 @@ hub.TextClassifierTask( ...@@ -28,4 +33,4 @@ hub.TextClassifierTask(
**示例** **示例**
[文本分类](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.4/demo/text_classification/text_classifier.py) [文本分类](../../../demo/text_classification/text_cls.py)
...@@ -95,7 +95,7 @@ label_list.txt的格式如下 ...@@ -95,7 +95,7 @@ label_list.txt的格式如下
``` ```
示例: 示例:
[DogCat数据集](https://github.com/PaddlePaddle/PaddleHub/wiki/PaddleHub-API:-Dataset#class-hubdatasetdogcatdataset)为示例,train_list.txt/test_list.txt/validate_list.txt内容如下示例 [DogCat数据集](../reference/dataset.md#class-hubdatasetdogcatdataset)为示例,train_list.txt/test_list.txt/validate_list.txt内容如下示例
``` ```
cat/3270.jpg 0 cat/3270.jpg 0
cat/646.jpg 0 cat/646.jpg 0
......
...@@ -22,6 +22,8 @@ if six.PY2: ...@@ -22,6 +22,8 @@ if six.PY2:
reload(sys) # noqa reload(sys) # noqa
sys.setdefaultencoding("UTF-8") sys.setdefaultencoding("UTF-8")
from .version import hub_version as __version__
from . import module from . import module
from . import common from . import common
from . import io from . import io
......
...@@ -77,7 +77,8 @@ def dpcnn(token_embeddings, ...@@ -77,7 +77,8 @@ def dpcnn(token_embeddings,
emb_dim=1024, emb_dim=1024,
blocks=6): blocks=6):
""" """
deepcnn net deepcnn net implemented as ACL2017 'Deep Pyramid Convolutional Neural Networks for Text Categorization'
For more information, please refer to https://www.aclweb.org/anthology/P17-1052.pdf.
""" """
def _block(x): def _block(x):
......
...@@ -13,5 +13,5 @@ ...@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" PaddleHub version string """ """ PaddleHub version string """
hub_version = "1.6.2" hub_version = "1.7.0"
module_proto_version = "1.0.0" module_proto_version = "1.0.0"
pre-commit pre-commit
protobuf >= 3.6.0 protobuf >= 3.6.0
yapf == 0.26.0 yapf == 0.26.0
pyyaml
Pillow
six >= 1.10.0 six >= 1.10.0
chardet == 3.0.4
requests
flask >= 1.1.0 flask >= 1.1.0
flake8 flake8
visualdl == 2.0.0a0 visualdl == 2.0.0a0
cma == 2.7.0 cma >= 2.7.0
sentencepiece sentencepiece
nltk
colorlog colorlog
opencv-python
# numpy no longer support python2 in version 1.17 and above
numpy ; python_version >= "3"
numpy < 1.17.0 ; python_version < "3"
# pandas no longer support python2 in version 0.25 and above # pandas no longer support python2 in version 0.25 and above
pandas ; python_version >= "3" pandas ; python_version >= "3"
pandas < 0.25.0 ; python_version < "3"
# gunicorn not support windows # gunicorn not support windows
gunicorn >= 19.10.0; sys_platform != "win32" gunicorn >= 19.10.0; sys_platform != "win32"
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册