Skip to content

  • 体验新版
    • 正在加载...
  • 登录
  • PaddlePaddle
  • PaddleFL
  • Issue
  • #7

P
PaddleFL
  • 项目概览

PaddlePaddle / PaddleFL

通知 35
Star 5
Fork 1
  • 代码
    • 文件
    • 提交
    • 分支
    • Tags
    • 贡献者
    • 分支图
    • Diff
  • Issue 6
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 4
  • Wiki 3
    • Wiki
  • 分析
    • 仓库
    • DevOps
  • 项目成员
  • Pages
P
PaddleFL
  • 项目概览
    • 项目概览
    • 详情
    • 发布
  • 仓库
    • 仓库
    • 文件
    • 提交
    • 分支
    • 标签
    • 贡献者
    • 分支图
    • 比较
  • Issue 6
    • Issue 6
    • 列表
    • 看板
    • 标记
    • 里程碑
  • 合并请求 4
    • 合并请求 4
  • Pages
  • 分析
    • 分析
    • 仓库分析
    • DevOps
  • Wiki 3
    • Wiki
  • 成员
    • 成员
  • 收起侧边栏
  • 动态
  • 分支图
  • 创建新Issue
  • 提交
  • Issue看板
You need to sign in or sign up before continuing.
已关闭
开放中
Opened 10月 29, 2019 by saxon_zh@saxon_zhGuest

运行fl_server报错“PaddleCheckError: Expected optimize_blocks.size() >= 1, but received optimize_blocks.size():0 < 1:1.”

Created by: XDUXK

我想把现有的一个目标检测模型SSD (https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/ssd) 的训练过程改写成FL的方式,目前运行fl_master没有提示出错,但是之后运行fl_server的时候报错“PaddleCheckError: Expected optimize_blocks.size() >= 1, but received optimize_blocks.size():0 < 1:1.”,不知道是哪里出的问题啊?相关的代码贴在了下面

1. fl_master.py

import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
import paddle_fl as fl
from paddle_fl.core.master.job_generator import JobGenerator
from paddle_fl.core.strategy.fl_strategy_base import FLStrategyFactory


batch_size = 2  # 64
train_images = 10  # 16551
lr = 0.001
lr_epochs = [40, 60, 80, 100]
lr_decay = [1, 0.5, 0.25, 0.1, 0.01]
image_shape = [3, 300, 300]
class_num = 6 # 21
ap_version = '11point'


class NetSSD:
    def __init__(self, image, class_num, image_shape):
        self.img = image
        self.num_classes = class_num
        self.img_shape = image_shape

    def ssd_net(self, scale=1.0):
        # 300x300
        tmp = self.conv_bn(self.img, 3, int(32 * scale), 2, 1)
        # 150x150
        tmp = self.depthwise_separable(tmp, 32, 64, 32, 1, scale)
        tmp = self.depthwise_separable(tmp, 64, 128, 64, 2, scale)
        # 75x75
        tmp = self.depthwise_separable(tmp, 128, 128, 128, 1, scale)
        tmp = self.depthwise_separable(tmp, 128, 256, 128, 2, scale)
        # 38x38
        tmp = self.depthwise_separable(tmp, 256, 256, 256, 1, scale)
        tmp = self.depthwise_separable(tmp, 256, 512, 256, 2, scale)

        # 19x19
        for i in range(5):
            tmp = self.depthwise_separable(tmp, 512, 512, 512, 1, scale)
        module11 = tmp
        tmp = self.depthwise_separable(tmp, 512, 1024, 512, 2, scale)

        # 10x10
        module13 = self.depthwise_separable(tmp, 1024, 1024, 1024, 1, scale)
        module14 = self.extra_block(module13, 256, 512, 1, 2)
        # 5x5
        module15 = self.extra_block(module14, 128, 256, 1, 2)
        # 3x3
        module16 = self.extra_block(module15, 128, 256, 1, 2)
        # 2x2
        module17 = self.extra_block(module16, 64, 128, 1, 2)

        mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
            inputs=[
                module11, module13, module14, module15, module16, module17
            ],
            image=self.img,
            num_classes=self.num_classes,
            min_ratio=20,
            max_ratio=90,
            min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
            max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
            aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
                           [2., 3.]],
            base_size=self.img_shape[2],
            offset=0.5,
            flip=True)

        self.loss = fluid.layers.ssd_loss(mbox_locs, mbox_confs, gt_box, gt_label, box,
                                     box_var)
        self.loss = fluid.layers.reduce_sum(self.loss)
        self.startup_program = fluid.default_startup_program()

    def conv_bn(self,
                input,
                filter_size,
                num_filters,
                stride,
                padding,
                num_groups=1,
                act='relu',
                use_cudnn=True):
        parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA())
        conv = fluid.layers.conv2d(
            input=input,
            num_filters=num_filters,
            filter_size=filter_size,
            stride=stride,
            padding=padding,
            groups=num_groups,
            act=None,
            use_cudnn=use_cudnn,
            param_attr=parameter_attr,
            bias_attr=False)
        return fluid.layers.batch_norm(input=conv, act=act)

    def depthwise_separable(self, input, num_filters1, num_filters2, num_groups,
                            stride, scale):
        depthwise_conv = self.conv_bn(
            input=input,
            filter_size=3,
            num_filters=int(num_filters1 * scale),
            stride=stride,
            padding=1,
            num_groups=int(num_groups * scale),
            use_cudnn=False)

        pointwise_conv = self.conv_bn(
            input=depthwise_conv,
            filter_size=1,
            num_filters=int(num_filters2 * scale),
            stride=1,
            padding=0)
        return pointwise_conv

    def extra_block(self, input, num_filters1, num_filters2, num_groups, stride):
        # 1x1 conv
        pointwise_conv = self.conv_bn(
            input=input,
            filter_size=1,
            num_filters=int(num_filters1),
            stride=1,
            num_groups=int(num_groups),
            padding=0)

        # 3x3 conv
        normal_conv = self.conv_bn(
            input=pointwise_conv,
            filter_size=3,
            num_filters=int(num_filters2),
            stride=2,
            num_groups=int(num_groups),
            padding=1)
        return normal_conv


def optimizer_setting():
    iters = train_images // batch_size
    boundaries = [i * iters  for i in lr_epochs]
    values = [ i * lr for i in lr_decay]

    optimizer = fluid.optimizer.RMSProp(
        learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        regularization=fluid.regularizer.L2Decay(0.00005), )

    return optimizer

py_reader = fluid.layers.py_reader(
            capacity=64,
            shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]],
            lod_levels=[0, 1, 1, 1],
            dtypes=["float32", "float32", "int32", "int32"],
            use_double_buffer=True)
image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader)

ssd_model = NetSSD(image, class_num, image_shape)
ssd_model.ssd_net()

job_generator = JobGenerator()
optimizer = optimizer_setting()

job_generator.set_optimizer(optimizer)
job_generator.set_losses([ssd_model.loss])
job_generator.set_startup_program(ssd_model.startup_program)
job_generator.set_infer_feed_and_target_names(
    [ssd_model.img.name], [ssd_model.loss.name])

build_strategy = FLStrategyFactory()
build_strategy.fed_avg = True
build_strategy.inner_step = 1
strategy = build_strategy.create_fl_strategy()

# endpoints will be collected through the cluster
# in this example, we suppose endpoints have been collected
endpoints = ["127.0.0.1:8181"]
output = "fl_job_config"
job_generator.generate_fl_job(
    strategy, server_endpoints=endpoints, worker_num=2, output=output)
# fl_job_config will  be dispatched to workers

2. fl_server.py

import paddle_fl as fl
import paddle.fluid as fluid
from paddle_fl.core.server.fl_server import FLServer
from paddle_fl.core.master.fl_job import FLRunTimeJob
server = FLServer()
server_id = 0
job_path = "fl_job_config"
job = FLRunTimeJob()
job.load_server_job(job_path, server_id)
server.set_server_job(job)
server.start()

3. fl_trainer.py

from paddle_fl.core.trainer.fl_trainer import FLTrainerFactory
from paddle_fl.core.master.fl_job import FLRunTimeJob
from paddle_fl.reader.gru4rec_reader import Gru4rec_Reader
import paddle.fluid as fluid
import numpy as np
import sys
import os
import logging
import ssd_reader
logging.basicConfig(filename="test.log", filemode="w", format="%(asctime)s %(name)s:%(levelname)s:%(message)s", datefmt="%d-%M-%Y %H:%M:%S", level=logging.DEBUG)


trainer_id = int(sys.argv[1]) # trainer id for each guest
use_gpu = False
batch_size = 64
epoch_num =120
dataset = 'mydata'
model_save_dir = 'model/model_trainer_%d' % trainer_id
pretrained_model = 'pretrained/ssd_mobilenet_v1_coco/'
ap_version = '11point'
image_shape = [3, 300, 300]
mean_BGR = [127.5, 127.5, 127.5]
data_dir = 'data/data_%d/%s' % (trainer_id, dataset)
label_file = 'label_list.txt'
train_file_list = 'all.txt'


data_args = ssd_reader.Settings(
        dataset=dataset,
        data_dir=data_dir,
        label_file=label_file,
        resize_h=image_shape[1],
        resize_w=image_shape[2],
        mean_value=mean_BGR,
        apply_distort=True,
        apply_expand=True,
        ap_version = ap_version)



place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
job_path = "fl_job_config"
job = FLRunTimeJob()
job.load_trainer_job(job_path, trainer_id)
trainer = FLTrainerFactory().create_fl_trainer(job)
trainer.start()

train_reader = ssd_reader.train(data_args,
                                train_file_list,
                                batch_size,
                                shuffle=True,
                                use_multiprocess=False,
                                num_workers=1,
                                )

step_i = 0
while not trainer.stop():
    step_i += 1
    print("batch %d start train" % (step_i))
    for data in train_reader():
        #print(np.array(data['src_wordseq']))
        ret_avg_cost = trainer.run(feed=data,
                    fetch=[])
        avg_ppl = np.exp(ret_avg_cost[0])
        newest_ppl = np.mean(avg_ppl)
        print("ppl:%.3f" % (newest_ppl))
    save_dir = (model_save_dir + "/epoch_%d") % step_i
    if trainer_id == 0:
        print("start save")
        trainer.save_inference_program(save_dir)
    if step_i >= 40:
        break
指派人
分配到
无
里程碑
无
分配里程碑
工时统计
无
截止日期
无
标识: paddlepaddle/PaddleFL#7
渝ICP备2023009037号

京公网安备11010502055752号

网络110报警服务 Powered by GitLab CE v13.7
开源知识
Git 入门 Pro Git 电子书 在线学 Git
Markdown 基础入门 IT 技术知识开源图谱
帮助
使用手册 反馈建议 博客
《GitCode 隐私声明》 《GitCode 服务条款》 关于GitCode
Powered by GitLab CE v13.7