save_inference_model ERROR
Created by: llcau
为使您的问题得到快速解决,在建立Issues前,请您先通过如下方式搜索是否有相似问题:【搜索issue关键字】【使用labels筛选】【官方文档】
如果您没有查询到相似问题,为快速解决您的提问,建立issue时请提供如下细节信息:
- 标题:简洁、精准概括您的问题,例如“Insufficient Memory xxx" ”
- 版本、环境信息: 1)PaddlePaddle版本:请提供您的PaddlePaddle版本号,例如1.1或CommitID 安装版本:paddlepaddle-1.5.1-cp27-cp27mu-linux_x86_64.whl 2)CPU:预测若用CPU,请提供CPU型号,MKL/OpenBlas/MKLDNN/等数学库使用情况 核数:28 Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 3)GPU:预测若用GPU,请提供GPU型号、CUDA和CUDNN版本号 4)系统环境:请您描述系统类型、版本,例如Mac OS 10.14,Python版本 x86_64 GNU/Linux python2.7
- 代码:
import paddle
import paddle.fluid as fluid
import logging
import time
import numpy as np
import os
import sys
import reader
import nets
use_cuda = False
learn_rate = 1e-4
model_save_path = "./output/models"
num_epochs = 20
batch_size = 256
train_sample_rate = 1
train_data_dir = "./data/train_data"
test_data_dir = "./data/test_data"
parallel = True
def main():
logging.basicConfig(level = logging.NOTSET)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
# train
train_program = fluid.Program()
train_startup = fluid.Program()
with fluid.program_guard(main_program = train_program, startup_program = train_startup):
with fluid.unique_name.guard():
[avg_cost, acc, predict, train_reader] = nets.bilstm_attention_classify_net(is_test = False, is_py_reader = True)
optimizer = fluid.optimizer.Adam(learning_rate = learn_rate)
optimizer.minimize(avg_cost)
# test
test_program = fluid.Program()
test_startup = fluid.Program()
with fluid.program_guard(main_program = test_program, startup_program = test_startup):
with fluid.unique_name.guard():
[test_avg_cost, test_acc, test_predict, test_reader] = nets.bilstm_attention_classify_net(is_test = True, is_py_reader = True)
infer_program = train_program.clone()
fluid.memory_optimize(train_program)
fluid.memory_optimize(test_program)
executor = fluid.Executor(place)
executor.run(train_startup)
executor.run(test_startup)
if parallel:
train_exe = fluid.ParallelExecutor(
use_cuda = use_cuda, loss_name = avg_cost.name,
main_program = train_program)
else:
print "not parallel, train_exe..."
train_exe = executor
#device_count = train_exe.device_count
#logging.info("device count: %d" % device_count)
#logging.info("start train process ...")
if parallel:
test_exe = fluid.ParallelExecutor(
use_cuda = use_cuda,
share_vars_from = train_exe,
main_program = test_program)
else:
print "not parallel, test_exe..."
test_exe = executor
train_reader.decorate_paddle_reader(
paddle.batch(
reader.reader(data_dir = train_data_dir,
sample_rate = train_sample_rate),
batch_size,
drop_last = False))
test_reader.decorate_paddle_reader(
paddle.batch(
reader.reader(data_dir = test_data_dir),
batch_size,
drop_last = False))
for epoch_id in range(num_epochs):
losses = []
start_time = time.time()
train_reader.start()
iter = 0
try:
while True:
avg_loss = train_exe.run([avg_cost.name])
print("epoch: %d, iter: %d, loss: %f" % (epoch_id, iter, np.mean(avg_loss[0])))
losses.append(np.mean(avg_loss[0]))
iter += 1
except fluid.core.EOFException:
train_reader.reset()
end_time = time.time()
print("epoch: %d, loss: %f, used time: %d sec"
% (epoch_id, np.mean(losses), end_time - start_time))
logging.info("start test process ...")
losses = []
test_reader.start()
try:
while True:
(avg_loss, avg_acc) = test_exe.run([test_avg_cost.name, test_acc.name])
losses.append((np.mean(avg_loss[0]), np.mean(avg_acc[0])))
except fluid.core.EOFException:
test_reader.reset()
print ("test at epoch: %d, loss: %f, acc: %f"
% (epoch_id, np.mean([i[0] for i in losses]),
np.mean([i[1] for i in losses])))
logging.info("start save process ...")
model_path = os.path.join(model_save_path, str(epoch_id))
if not os.path.exists(model_save_path):
os.makedirs(model_save_path)
fluid.io.save_inference_model(
dirname = model_path,
feeded_var_names = ["text"],
target_vars = [test_predict],
executor = executor)
#main_program = infer_program)
if __name__ == '__main__':
main()
- 复现信息:如为报错,请给出复现环境、复现步骤 执行命令:fluid script/train.py
- 问题描述:请详细描述您的问题,同步贴出报错信息、日志、可复现的代码片段 报错信息如下: memory_optimize is deprecated. Use CompiledProgram and Executor memory_optimize is deprecated. Use CompiledProgram and Executor The CPU_NUM is not specified, you should set CPU_NUM in the environment variable list, i.e export CPU_NUM=1. CPU_NUM indicates that how many CPUPlace are used in the current task. !!! The default number of CPUPlaces is 1.
I0910 11:38:03.192989 27478 parallel_executor.cc:329] The number of CPUPlace, which is used in ParallelExecutor, is 1. And the Program will be copied 1 copies I0910 11:38:03.197156 27478 build_strategy.cc:340] SeqOnlyAllReduceOps:0, num_trainers:1 share_vars_from is set, scope is ignored. I0910 11:38:03.201052 27478 parallel_executor.cc:329] The number of CPUPlace, which is used in ParallelExecutor, is 1. And the Program will be copied 1 copies I0910 11:38:03.201953 27478 build_strategy.cc:340] SeqOnlyAllReduceOps:0, num_trainers:1 loaded train_data epoch: 0, iter: 0, loss: 0.574403 epoch: 0, loss: 0.574403, used time: 3 sec INFO:root:start test process ... loaded test_data test at epoch: 0, loss: 0.582142, acc: 0.600000 INFO:root:start save process ... Traceback (most recent call last): File "script/train.py", line 141, in main() File "script/train.py", line 120, in main executor = executor) File "/home/work/lixin46/paddle/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/io.py", line 1071, in save_inference_model prepend_feed_ops(main_program, feeded_var_names) File "/home/work/lixin46/paddle/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/io.py", line 884, in prepend_feed_ops out = global_block.var(name) File "/home/work/lixin46/paddle/paddle_release_home/python/lib/python2.7/site-packages/paddle/fluid/framework.py", line 1556, in var raise ValueError("var %s not in this block" % name) ValueError: var text not in this block