diff --git a/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb b/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb index 963496357fcc01cec077e26d97d3770bbac7b896..fb562620846ff291b4c5438bb568de8baf6af099 100644 --- a/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb +++ b/tutorials/notebook/data_loading_enhance/data_loading_enhancement.ipynb @@ -119,7 +119,7 @@ "source": [ "- ### repeat\n", "\n", - "在有限的数据集内,为了优化网络,通常会将一个数据集训练多次。加倍数据集,通常用在多个`epoch`训练中,通过`repeat`来加倍数据量。\n", + "在有限的数据集内,为了优化网络,通常会将一个数据集训练多次。加倍数据集,通过`repeat`来加倍数据量。\n", "\n", "我们可以定义`ds2`数据集,调用`repeat`来加倍数据量。其中,将倍数设为2,故`ds3`数据量为原始数据集`ds2`的2倍。" ] diff --git a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb index b1d3a2a1d177ebafd990c1cca45c16559b7ac55b..29100b841a1002724e8bcdc65b21d99aea7113c2 100644 --- a/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb +++ b/tutorials/notebook/mindinsight/mindinsight_model_lineage_and_data_lineage.ipynb @@ -425,7 +425,7 @@ " mnist_path = \"./MNIST_Data\"\n", " \n", " net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean')\n", - " repeat_size = epoch_size\n", + " repeat_size = 1\n", " # create the network\n", " network = LeNet5()\n", "\n", @@ -702,4 +702,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/notebook/nlp_application.ipynb b/tutorials/notebook/nlp_application.ipynb index d09dd09ff4d4a43004c58bac40d2697ac22b642a..0d9c01f0071471974532021b1823cc9854e36cf0 100644 --- a/tutorials/notebook/nlp_application.ipynb +++ b/tutorials/notebook/nlp_application.ipynb @@ -567,7 +567,7 @@ "\n", " return data_set\n", "\n", - "ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs)" + "ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size)" ] }, { @@ -5143,4 +5143,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/tutorials/source_en/advanced_use/computer_vision_application.md b/tutorials/source_en/advanced_use/computer_vision_application.md index c51b3db4a2f9ef739aa32bef7bdd653c6c7a9626..80ea21413c44ee9eeb48ab5897121f1b6a97c447 100644 --- a/tutorials/source_en/advanced_use/computer_vision_application.md +++ b/tutorials/source_en/advanced_use/computer_vision_application.md @@ -203,7 +203,7 @@ The trained model file (such as `resnet.ckpt`) can be used to predict the class ```python param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) -eval_dataset = create_dataset(1, training=False) +eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) ``` diff --git a/tutorials/source_en/advanced_use/differential_privacy.md b/tutorials/source_en/advanced_use/differential_privacy.md index c85f4c6b3c30d32c9d9c43f5c0abc783601cc369..836b52b16c3325d2d882d9453298f529de815141 100644 --- a/tutorials/source_en/advanced_use/differential_privacy.md +++ b/tutorials/source_en/advanced_use/differential_privacy.md @@ -234,8 +234,7 @@ ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", # get training dataset ds_train = generate_mnist_dataset(os.path.join(cfg.data_path, "train"), - cfg.batch_size, - cfg.epoch_size) + cfg.batch_size) ``` ### Introducing the Differential Privacy diff --git a/tutorials/source_en/advanced_use/distributed_training.md b/tutorials/source_en/advanced_use/distributed_training.md index 119c82e95264fdaef43aaf6cd19d2229e58d7061..d98bdf3e3189a512ac3fc15146ad24631e9ee534 100644 --- a/tutorials/source_en/advanced_use/distributed_training.md +++ b/tutorials/source_en/advanced_use/distributed_training.md @@ -247,7 +247,7 @@ context.set_context(device_id=device_id) # set device_id def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/source_en/advanced_use/nlp_application.md b/tutorials/source_en/advanced_use/nlp_application.md index 02cdea4c2c81e0ebc7e4d7f8bdc670a1d4d81e71..bc531f49635c78ad50092691f1657f9ffeb50c8a 100644 --- a/tutorials/source_en/advanced_use/nlp_application.md +++ b/tutorials/source_en/advanced_use/nlp_application.md @@ -204,7 +204,7 @@ Load the corresponding dataset, configure the CheckPoint generation information, model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") -ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) +ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/tutorials/source_en/quick_start/quick_start.md b/tutorials/source_en/quick_start/quick_start.md index 500c9614e3fa7e445a33a0822c73895495ead5d6..7599bd4b9285f43db1458c18056bf1ee524e2318 100644 --- a/tutorials/source_en/quick_start/quick_start.md +++ b/tutorials/source_en/quick_start/quick_start.md @@ -355,7 +355,7 @@ if __name__ == "__main__": epoch_size = 1 mnist_path = "./MNIST_Data" - repeat_size = epoch_size + repeat_size = 1 model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode) ... diff --git a/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md b/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md index c87e4863798c308870c117a77a9a5b04b688e441..b3fe180a5b809b1681c39df352d68a6f6fc1ebda 100644 --- a/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md +++ b/tutorials/source_en/use/data_preparation/data_processing_and_augmentation.md @@ -88,7 +88,7 @@ In limited datasets, to optimize the network, a dataset is usually trained for m > In machine learning, an epoch refers to one cycle through the full training dataset. -During multiple epochs, `repeat` can be used to increase the data size. The definition of `repeat` is as follows: +During training, `repeat` can be used to increase the data size. The definition of `repeat` is as follows: ```python def repeat(self, count=None): ``` diff --git a/tutorials/source_zh_cn/advanced_use/computer_vision_application.md b/tutorials/source_zh_cn/advanced_use/computer_vision_application.md index 5b0d066e6a6e91c32645fa681d7a8496f422c143..dfc423ea48475c0c2a2306b860a79f0d88dfdd30 100644 --- a/tutorials/source_zh_cn/advanced_use/computer_vision_application.md +++ b/tutorials/source_zh_cn/advanced_use/computer_vision_application.md @@ -205,7 +205,7 @@ model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) ```python param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) -eval_dataset = create_dataset(1, training=False) +eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) ``` diff --git a/tutorials/source_zh_cn/advanced_use/differential_privacy.md b/tutorials/source_zh_cn/advanced_use/differential_privacy.md index 74998d99282254d93306c4782602a8e9faa5b2f5..2ffd69446234c441e5f8ebbbf0399b1ca3fd9737 100644 --- a/tutorials/source_zh_cn/advanced_use/differential_privacy.md +++ b/tutorials/source_zh_cn/advanced_use/differential_privacy.md @@ -220,8 +220,7 @@ ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", # get training dataset ds_train = generate_mnist_dataset(os.path.join(cfg.data_path, "train"), - cfg.batch_size, - cfg.epoch_size) + cfg.batch_size) ``` ### 引入差分隐私 diff --git a/tutorials/source_zh_cn/advanced_use/distributed_training.md b/tutorials/source_zh_cn/advanced_use/distributed_training.md index a0636cc6663c998249ddc6eacd92fd21ac4d12b5..423c979c4cb433602a30bb0e7cb669c81d591298 100644 --- a/tutorials/source_zh_cn/advanced_use/distributed_training.md +++ b/tutorials/source_zh_cn/advanced_use/distributed_training.md @@ -248,7 +248,7 @@ context.set_context(device_id=device_id) # set device_id def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/source_zh_cn/advanced_use/nlp_application.md b/tutorials/source_zh_cn/advanced_use/nlp_application.md index ae9f9bb7c1674e1d5698a78ac69ff988e7be8630..540f4e19096fcf08db98f43541b397cb5cc6bf7f 100644 --- a/tutorials/source_zh_cn/advanced_use/nlp_application.md +++ b/tutorials/source_zh_cn/advanced_use/nlp_application.md @@ -204,7 +204,7 @@ loss_cb = LossMonitor() model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Training ==============") -ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) +ds_train = lstm_create_dataset(args.preprocess_path, cfg.batch_size) config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) diff --git a/tutorials/source_zh_cn/quick_start/quick_start.md b/tutorials/source_zh_cn/quick_start/quick_start.md index 19a7c8e7a442b7760079e5297f2f1eb8788c8c8c..365c41ea814770a800be43064eca015652ed6b06 100644 --- a/tutorials/source_zh_cn/quick_start/quick_start.md +++ b/tutorials/source_zh_cn/quick_start/quick_start.md @@ -357,7 +357,7 @@ if __name__ == "__main__": epoch_size = 1 mnist_path = "./MNIST_Data" - repeat_size = epoch_size + repeat_size = 1 model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) train_net(args, model, epoch_size, mnist_path, repeat_size, ckpoint_cb, dataset_sink_mode) ... diff --git a/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md b/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md index b350eac747753ad950206af1bc93a5c4f1239ac4..fad8413cfd7a1c91e33b9e07bdd5d1ab9f3f7bf9 100644 --- a/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md +++ b/tutorials/source_zh_cn/use/data_preparation/data_processing_and_augmentation.md @@ -89,7 +89,7 @@ ds1 = ds1.repeat(10) > 在机器学习中,每训练完一个完整的数据集,我们称为训练完了一个epoch。 -加倍数据集,通常用在多个epoch(迭代)训练中,通过`repeat`来加倍数据量。`repeat`定义如下: +加倍数据集,通常用在训练中,通过`repeat`来加倍数据量。`repeat`定义如下: ```python def repeat(self, count=None): ``` diff --git a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py index 2d7db4e52af9e5a8beba3a8c18314739750a3502..ec152dc17f8f9672f77196280b392954bfb83ee3 100644 --- a/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py +++ b/tutorials/tutorial_code/distributed_training/resnet50_distributed_training.py @@ -120,7 +120,7 @@ def test_train_cifar(epoch_size=10): context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL, mirror_mean=True) loss_cb = LossMonitor() data_path = os.getenv('DATA_PATH') - dataset = create_dataset(data_path, epoch_size) + dataset = create_dataset(data_path) batch_size = 32 num_classes = 10 net = resnet50(batch_size, num_classes) diff --git a/tutorials/tutorial_code/lenet.py b/tutorials/tutorial_code/lenet.py index 441f423360c179140f23767970e865e179a10a6c..5f5dfffb22d5e8f6f6a77250b73af033c8e32955 100644 --- a/tutorials/tutorial_code/lenet.py +++ b/tutorials/tutorial_code/lenet.py @@ -206,7 +206,7 @@ if __name__ == "__main__": mnist_path = "./MNIST_Data" # define the loss function net_loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction='mean') - repeat_size = epoch_size + repeat_size = 1 # create the network network = LeNet5() # define the optimizer diff --git a/tutorials/tutorial_code/resnet/cifar_resnet50.py b/tutorials/tutorial_code/resnet/cifar_resnet50.py index c77059041ca0402cfdf75b631723bb7307969c87..94cca8b461eb6d9336c4fdabb70bf19fdd8fbc9d 100644 --- a/tutorials/tutorial_code/resnet/cifar_resnet50.py +++ b/tutorials/tutorial_code/resnet/cifar_resnet50.py @@ -118,7 +118,7 @@ if __name__ == '__main__': # as for train, users could use model.train if args_opt.do_train: - dataset = create_dataset(epoch_size) + dataset = create_dataset() batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=35) ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) @@ -130,6 +130,6 @@ if __name__ == '__main__': if args_opt.checkpoint_path: param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) - eval_dataset = create_dataset(1, training=False) + eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res) diff --git a/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py b/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py index 5d5d8b85068c6c1c5d8bdd03b35b78666cf870b9..2ec27f4862fb96c8209e574c6fbad58cfd7033f8 100644 --- a/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py +++ b/tutorials/tutorial_code/sample_for_cloud/resnet50_train.py @@ -130,7 +130,7 @@ def resnet50_train(args_opt): # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, - repeat_num=epoch_size, batch_size=batch_size) + repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size()