Add mdtc model.

521e222d · KP · 87ef68f1 · 521e222d · 521e222d · 521e222d
Showing with 244 addition and 0 deletion

paddlespeech/kws/__init__.py paddlespeech/kws/__init__.py +13 -0

paddlespeech/kws/models/__init__.py paddlespeech/kws/models/__init__.py +13 -0

paddlespeech/kws/models/mdtc.py paddlespeech/kws/models/mdtc.py +218 -0

未找到文件。
--- a/paddlespeech/kws/__init__.py
+++ b/paddlespeech/kws/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/kws/models/__init__.py
+++ b/paddlespeech/kws/models/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/kws/models/mdtc.py
+++ b/paddlespeech/kws/models/mdtc.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class DSDilatedConv1d(nn.Layer):
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            kernel_size: int,
+            dilation: int=1,
+            stride: int=1,
+            bias: bool=True, ):
+        super(DSDilatedConv1d, self).__init__()
+        self.receptive_fields = dilation * (kernel_size - 1)
+        self.conv = nn.Conv1D(
+            in_channels,
+            in_channels,
+            kernel_size,
+            padding=0,
+            dilation=dilation,
+            stride=stride,
+            groups=in_channels,
+            bias_attr=bias, )
+        self.bn = nn.BatchNorm1D(in_channels)
+        self.pointwise = nn.Conv1D(
+            in_channels,
+            out_channels,
+            kernel_size=1,
+            padding=0,
+            dilation=1,
+            bias_attr=bias)
+    def forward(self, inputs: paddle.Tensor):
+        outputs = self.conv(inputs)
+        outputs = self.bn(outputs)
+        outputs = self.pointwise(outputs)
+        return outputs
+class TCNBlock(nn.Layer):
+    def __init__(
+            self,
+            in_channels: int,
+            res_channels: int,
+            kernel_size: int,
+            dilation: int,
+            causal: bool, ):
+        super(TCNBlock, self).__init__()
+        self.in_channels = in_channels
+        self.res_channels = res_channels
+        self.kernel_size = kernel_size
+        self.dilation = dilation
+        self.causal = causal
+        self.receptive_fields = dilation * (kernel_size - 1)
+        self.half_receptive_fields = self.receptive_fields // 2
+        self.conv1 = DSDilatedConv1d(
+            in_channels=in_channels,
+            out_channels=res_channels,
+            kernel_size=kernel_size,
+            dilation=dilation, )
+        self.bn1 = nn.BatchNorm1D(res_channels)
+        self.relu1 = nn.ReLU()
+        self.conv2 = nn.Conv1D(
+            in_channels=res_channels, out_channels=res_channels, kernel_size=1)
+        self.bn2 = nn.BatchNorm1D(res_channels)
+        self.relu2 = nn.ReLU()
+    def forward(self, inputs: paddle.Tensor):
+        outputs = self.relu1(self.bn1(self.conv1(inputs)))
+        outputs = self.bn2(self.conv2(outputs))
+        if self.causal:
+            inputs = inputs[:, :, self.receptive_fields:]
+        else:
+            inputs = inputs[:, :, self.half_receptive_fields:
+                            -self.half_receptive_fields]
+        if self.in_channels == self.res_channels:
+            res_out = self.relu2(outputs + inputs)
+        else:
+            res_out = self.relu2(outputs)
+        return res_out
+class TCNStack(nn.Layer):
+    def __init__(
+            self,
+            in_channels: int,
+            stack_num: int,
+            stack_size: int,
+            res_channels: int,
+            kernel_size: int,
+            causal: bool, ):
+        super(TCNStack, self).__init__()
+        self.in_channels = in_channels
+        self.stack_num = stack_num
+        self.stack_size = stack_size
+        self.res_channels = res_channels
+        self.kernel_size = kernel_size
+        self.causal = causal
+        self.res_blocks = self.stack_tcn_blocks()
+        self.receptive_fields = self.calculate_receptive_fields()
+        self.res_blocks = nn.Sequential(*self.res_blocks)
+    def calculate_receptive_fields(self):
+        receptive_fields = 0
+        for block in self.res_blocks:
+            receptive_fields += block.receptive_fields
+        return receptive_fields
+    def build_dilations(self):
+        dilations = []
+        for s in range(0, self.stack_size):
+            for l in range(0, self.stack_num):
+                dilations.append(2**l)
+        return dilations
+    def stack_tcn_blocks(self):
+        dilations = self.build_dilations()
+        res_blocks = nn.LayerList()
+        res_blocks.append(
+            TCNBlock(
+                self.in_channels,
+                self.res_channels,
+                self.kernel_size,
+                dilations[0],
+                self.causal, ))
+        for dilation in dilations[1:]:
+            res_blocks.append(
+                TCNBlock(
+                    self.res_channels,
+                    self.res_channels,
+                    self.kernel_size,
+                    dilation,
+                    self.causal, ))
+        return res_blocks
+    def forward(self, inputs: paddle.Tensor):
+        outputs = self.res_blocks(inputs)
+        return outputs
+class MDTC(nn.Layer):
+    def __init__(
+            self,
+            stack_num: int,
+            stack_size: int,
+            in_channels: int,
+            res_channels: int,
+            kernel_size: int,
+            causal: bool, ):
+        super(MDTC, self).__init__()
+        assert kernel_size % 2 == 1
+        self.kernel_size = kernel_size
+        self.causal = causal
+        self.preprocessor = TCNBlock(
+            in_channels, res_channels, kernel_size, dilation=1, causal=causal)
+        self.relu = nn.ReLU()
+        self.blocks = nn.LayerList()
+        self.receptive_fields = self.preprocessor.receptive_fields
+        for i in range(stack_num):
+            self.blocks.append(
+                TCNStack(res_channels, stack_size, 1, res_channels, kernel_size,
+                         causal))
+            self.receptive_fields += self.blocks[-1].receptive_fields
+        self.half_receptive_fields = self.receptive_fields // 2
+    def forward(self, x: paddle.Tensor):
+        if self.causal:
+            outputs = F.pad(x, (0, 0, self.receptive_fields, 0, 0, 0),
+                            'constant')
+        else:
+            outputs = F.pad(
+                x,
+                (0, 0, self.half_receptive_fields, self.half_receptive_fields,
+                 0, 0),
+                'constant', )
+        outputs = outputs.transpose([0, 2, 1])
+        outputs_list = []
+        outputs = self.relu(self.preprocessor(outputs))
+        for block in self.blocks:
+            outputs = block(outputs)
+            outputs_list.append(outputs)
+        normalized_outputs = []
+        output_size = outputs_list[-1].shape[-1]
+        for x in outputs_list:
+            remove_length = x.shape[-1] - output_size
+            if self.causal and remove_length > 0:
+                normalized_outputs.append(x[:, :, remove_length:])
+            elif not self.causal and remove_length > 1:
+                half_remove_length = remove_length // 2
+                normalized_outputs.append(
+                    x[:, :, half_remove_length:-half_remove_length])
+            else:
+                normalized_outputs.append(x)
+        outputs = paddle.zeros_like(
+            outputs_list[-1], dtype=outputs_list[-1].dtype)
+        for x in normalized_outputs:
+            outputs += x
+        outputs = outputs.transpose([0, 2, 1])
+        return outputs, None