iterable_dataset.py 4.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
16 17
import tempfile

18
import numpy as np
19

20 21
import paddle
import paddle.nn.functional as F
22
from paddle import nn
23
from paddle.distributed.fleet import auto
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38

paddle.enable_static()
global_process_mesh = auto.ProcessMesh(mesh=[0, 1])
PP_MESH_0 = auto.ProcessMesh([0])
PP_MESH_1 = auto.ProcessMesh([1])
batch_size = 2
batch_num = 10
hidden_size = 1024
sequence_len = 512
image_size = hidden_size
class_num = 10

paddle.seed(44)


39
class MyDataset(paddle.io.IterableDataset):
40 41 42 43 44 45 46 47 48 49
    def __init__(self, num_samples):
        self.num_samples = num_samples

    def __iter__(self):
        for i in range(self.num_samples):
            input = np.random.uniform(size=image_size).astype("float32")
            label = np.random.randint(0, class_num - 1, dtype="int64")
            yield input, label


50
class MyDataset1(paddle.io.Dataset):
51 52 53 54 55
    def __init__(self, num_samples):
        self.num_samples = num_samples
        self.data = []
        for i in range(self.num_samples):
            input1 = np.random.uniform(size=image_size).astype("float32")
56 57 58
            label1 = np.array(
                np.random.randint(0, class_num - 1, dtype="int64")
            )
59
            input2 = np.random.uniform(size=image_size).astype("float32")
60 61 62
            label2 = np.array(
                np.random.randint(0, class_num - 1, dtype="int64")
            )
63 64 65 66 67 68 69 70 71 72 73 74
            input = np.stack((input1, input2))
            label = np.stack((label1, label2))
            self.data.append((input, label))

    def __getitem__(self, idx):
        return self.data[idx]

    def __len__(self):
        return len(self.data)


class MLPLayer(nn.Layer):
75 76 77 78 79 80 81
    def __init__(
        self,
        hidden_size=1024,
        intermediate_size=4 * 1024,
        dropout_ratio=0.1,
        initializer_range=0.02,
    ):
82
        super().__init__()
83 84 85
        d_model = hidden_size
        dim_feedforward = intermediate_size
        weight_attr = paddle.ParamAttr(
86 87
            initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)
        )
88 89
        bias_attr = None

90 91 92 93 94 95
        self.linear0 = nn.Linear(
            d_model, dim_feedforward, weight_attr, bias_attr=bias_attr
        )
        self.linear1 = nn.Linear(
            dim_feedforward, d_model, weight_attr, bias_attr=bias_attr
        )
96 97 98 99 100
        self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=bias_attr)
        self.norm = nn.LayerNorm(d_model, epsilon=1e-5)
        self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train")

    def forward(self, input):
101
        out = auto.shard_op(self.norm, PP_MESH_0)(input)
102 103
        out = self.linear0(out)
        out = F.gelu(out, approximate=True)
104
        out = auto.shard_op(self.linear1, PP_MESH_1)(out)
105 106 107 108 109 110 111
        out = self.dropout(out)
        out = self.linear2(out)
        self.out = out
        return out


def train(fetch):
112 113 114 115 116 117
    mlp = MLPLayer(
        hidden_size=hidden_size,
        intermediate_size=4 * hidden_size,
        dropout_ratio=0.1,
        initializer_range=0.02,
    )
118
    loss = paddle.nn.CrossEntropyLoss()
119 120 121 122 123 124 125
    optimizer = paddle.optimizer.Adam(
        learning_rate=0.00001,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-08,
        grad_clip=None,
    )
126

127 128
    dist_strategy = auto.Strategy()
    dist_strategy.auto_mode = "semi"
129 130 131
    dist_strategy.split_data = True

    # init engine
132 133 134
    engine = auto.Engine(
        mlp, loss, optimizer, paddle.metric.Accuracy(), strategy=dist_strategy
    )
135 136 137

    # train
    train_dataset = MyDataset(batch_num * batch_size)
138 139 140 141
    engine.fit(train_dataset, epochs=2, batch_size=batch_size)

    train_dataset1 = MyDataset1(batch_size * batch_num)
    engine.fit(train_dataset1, epochs=2, batch_size=None)
142 143 144

    # eval
    eval_dataset = MyDataset(batch_size)
145
    engine.evaluate(eval_dataset, batch_size=batch_size)
146 147 148

    # predict
    test_dataset = MyDataset(batch_size)
149
    engine.predict(test_dataset, batch_size=batch_size)
150 151 152 153

    # save
    temp_dir = tempfile.TemporaryDirectory()
    model_filename = os.path.join(temp_dir.name, 'mlp_inf')
154
    engine.save(model_filename, training=False)
155 156 157 158 159
    temp_dir.cleanup()


if __name__ == "__main__":
    train(fetch=True)