提交 6569ce12 编写于 作者: H Hui Zhang

lm embed and format code

上级 871fc5b7
...@@ -227,4 +227,4 @@ class AugmentationPipeline(): ...@@ -227,4 +227,4 @@ class AugmentationPipeline():
obj = class_obj(self._rng, **params) obj = class_obj(self._rng, **params)
except Exception: except Exception:
raise ValueError("Unknown augmentor type [%s]." % augmentor_type) raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
return obj return obj
\ No newline at end of file
...@@ -24,6 +24,7 @@ __all__ = ["LoadInputsAndTargets"] ...@@ -24,6 +24,7 @@ __all__ = ["LoadInputsAndTargets"]
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
class LoadInputsAndTargets(): class LoadInputsAndTargets():
"""Create a mini-batch from a list of dicts """Create a mini-batch from a list of dicts
......
...@@ -24,11 +24,11 @@ from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface ...@@ -24,11 +24,11 @@ from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface
from deepspeech.models.lm_interface import LMInterface from deepspeech.models.lm_interface import LMInterface
from deepspeech.modules.encoder import TransformerEncoder from deepspeech.modules.encoder import TransformerEncoder
from deepspeech.modules.mask import subsequent_mask from deepspeech.modules.mask import subsequent_mask
from deepspeech.utils.log import Log from deepspeech.utils.log import Log
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface): class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
def __init__( def __init__(
self, self,
......
...@@ -23,12 +23,14 @@ from deepspeech.utils.log import Log ...@@ -23,12 +23,14 @@ from deepspeech.utils.log import Log
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
__all__ = [ __all__ = [
"PositionalEncodingInterface", "NoPositionalEncoding", "PositionalEncoding", "RelPositionalEncoding" "PositionalEncodingInterface", "NoPositionalEncoding", "PositionalEncoding",
"RelPositionalEncoding"
] ]
class PositionalEncodingInterface:
def forward(self, x:paddle.Tensor, offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]: class PositionalEncodingInterface:
def forward(self, x: paddle.Tensor,
offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
"""Compute positional encoding. """Compute positional encoding.
Args: Args:
x (paddle.Tensor): Input tensor (batch, time, `*`). x (paddle.Tensor): Input tensor (batch, time, `*`).
...@@ -37,8 +39,8 @@ class PositionalEncodingInterface: ...@@ -37,8 +39,8 @@ class PositionalEncodingInterface:
paddle.Tensor: Positional embedding tensor (1, time, `*`). paddle.Tensor: Positional embedding tensor (1, time, `*`).
""" """
raise NotImplementedError("forward method is not implemented") raise NotImplementedError("forward method is not implemented")
def position_encoding(self, offset:int, size:int) -> paddle.Tensor: def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
""" For getting encoding in a streaming fashion """ For getting encoding in a streaming fashion
Args: Args:
offset (int): start offset offset (int): start offset
......
...@@ -32,7 +32,6 @@ from deepspeech.modules.encoder_layer import TransformerEncoderLayer ...@@ -32,7 +32,6 @@ from deepspeech.modules.encoder_layer import TransformerEncoderLayer
from deepspeech.modules.mask import add_optional_chunk_mask from deepspeech.modules.mask import add_optional_chunk_mask
from deepspeech.modules.mask import make_non_pad_mask from deepspeech.modules.mask import make_non_pad_mask
from deepspeech.modules.positionwise_feed_forward import PositionwiseFeedForward from deepspeech.modules.positionwise_feed_forward import PositionwiseFeedForward
from deepspeech.modules.subsampling import Conv2dSubsampling
from deepspeech.modules.subsampling import Conv2dSubsampling4 from deepspeech.modules.subsampling import Conv2dSubsampling4
from deepspeech.modules.subsampling import Conv2dSubsampling6 from deepspeech.modules.subsampling import Conv2dSubsampling6
from deepspeech.modules.subsampling import Conv2dSubsampling8 from deepspeech.modules.subsampling import Conv2dSubsampling8
...@@ -394,13 +393,8 @@ class TransformerEncoder(BaseEncoder): ...@@ -394,13 +393,8 @@ class TransformerEncoder(BaseEncoder):
if self.global_cmvn is not None: if self.global_cmvn is not None:
xs = self.global_cmvn(xs) xs = self.global_cmvn(xs)
if isinstance(self.embed, Conv2dSubsampling): #TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
#TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor xs, pos_emb, masks = self.embed(xs, masks.astype(xs.dtype), offset=0)
xs, pos_emb, masks = self.embed(
xs, masks.astype(xs.dtype), offset=0)
else:
xs, pos_emb, masks = self.embed(
xs, masks.astype(xs.dtype), offset=0)
#TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor #TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
masks = masks.astype(paddle.bool) masks = masks.astype(paddle.bool)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io import io
import h5py import h5py
...@@ -9,16 +22,15 @@ class CMVN(): ...@@ -9,16 +22,15 @@ class CMVN():
"Apply Global/Spk CMVN/iverserCMVN." "Apply Global/Spk CMVN/iverserCMVN."
def __init__( def __init__(
self, self,
stats, stats,
norm_means=True, norm_means=True,
norm_vars=False, norm_vars=False,
filetype="mat", filetype="mat",
utt2spk=None, utt2spk=None,
spk2utt=None, spk2utt=None,
reverse=False, reverse=False,
std_floor=1.0e-20, std_floor=1.0e-20, ):
):
self.stats_file = stats self.stats_file = stats
self.norm_means = norm_means self.norm_means = norm_means
self.norm_vars = norm_vars self.norm_vars = norm_vars
...@@ -84,17 +96,14 @@ class CMVN(): ...@@ -84,17 +96,14 @@ class CMVN():
self.scale[spk] = 1 / std self.scale[spk] = 1 / std
def __repr__(self): def __repr__(self):
return ( return ("{name}(stats_file={stats_file}, "
"{name}(stats_file={stats_file}, " "norm_means={norm_means}, norm_vars={norm_vars}, "
"norm_means={norm_means}, norm_vars={norm_vars}, " "reverse={reverse})".format(
"reverse={reverse})".format( name=self.__class__.__name__,
name=self.__class__.__name__, stats_file=self.stats_file,
stats_file=self.stats_file, norm_means=self.norm_means,
norm_means=self.norm_means, norm_vars=self.norm_vars,
norm_vars=self.norm_vars, reverse=self.reverse, ))
reverse=self.reverse,
)
)
def __call__(self, x, uttid=None): def __call__(self, x, uttid=None):
if self.utt2spk is not None: if self.utt2spk is not None:
...@@ -121,6 +130,7 @@ class CMVN(): ...@@ -121,6 +130,7 @@ class CMVN():
class UtteranceCMVN(): class UtteranceCMVN():
"Apply Utterance CMVN" "Apply Utterance CMVN"
def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20): def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20):
self.norm_means = norm_means self.norm_means = norm_means
self.norm_vars = norm_vars self.norm_vars = norm_vars
...@@ -130,20 +140,19 @@ class UtteranceCMVN(): ...@@ -130,20 +140,19 @@ class UtteranceCMVN():
return "{name}(norm_means={norm_means}, norm_vars={norm_vars})".format( return "{name}(norm_means={norm_means}, norm_vars={norm_vars})".format(
name=self.__class__.__name__, name=self.__class__.__name__,
norm_means=self.norm_means, norm_means=self.norm_means,
norm_vars=self.norm_vars, norm_vars=self.norm_vars, )
)
def __call__(self, x, uttid=None): def __call__(self, x, uttid=None):
# x: [Time, Dim] # x: [Time, Dim]
square_sums = (x ** 2).sum(axis=0) square_sums = (x**2).sum(axis=0)
mean = x.mean(axis=0) mean = x.mean(axis=0)
if self.norm_means: if self.norm_means:
x = np.subtract(x, mean) x = np.subtract(x, mean)
if self.norm_vars: if self.norm_vars:
var = square_sums / x.shape[0] - mean ** 2 var = square_sums / x.shape[0] - mean**2
std = np.maximum(np.sqrt(var), self.std_floor) std = np.maximum(np.sqrt(var), self.std_floor)
x = np.divide(x, std) x = np.divide(x, std)
return x return x
\ No newline at end of file
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io import io
import logging import logging
import sys import sys
...@@ -10,11 +23,10 @@ from deepspeech.io.reader import SoundHDF5File ...@@ -10,11 +23,10 @@ from deepspeech.io.reader import SoundHDF5File
def file_reader_helper( def file_reader_helper(
rspecifier: str, rspecifier: str,
filetype: str = "mat", filetype: str="mat",
return_shape: bool = False, return_shape: bool=False,
segments: str = None, segments: str=None, ):
):
"""Read uttid and array in kaldi style """Read uttid and array in kaldi style
This function might be a bit confusing as "ark" is used This function might be a bit confusing as "ark" is used
...@@ -44,7 +56,8 @@ def file_reader_helper( ...@@ -44,7 +56,8 @@ def file_reader_helper(
""" """
if filetype == "mat": if filetype == "mat":
return KaldiReader(rspecifier, return_shape=return_shape, segments=segments) return KaldiReader(
rspecifier, return_shape=return_shape, segments=segments)
elif filetype == "hdf5": elif filetype == "hdf5":
return HDF5Reader(rspecifier, return_shape=return_shape) return HDF5Reader(rspecifier, return_shape=return_shape)
elif filetype == "sound.hdf5": elif filetype == "sound.hdf5":
...@@ -62,7 +75,8 @@ class KaldiReader: ...@@ -62,7 +75,8 @@ class KaldiReader:
self.segments = segments self.segments = segments
def __iter__(self): def __iter__(self):
with kaldiio.ReadHelper(self.rspecifier, segments=self.segments) as reader: with kaldiio.ReadHelper(
self.rspecifier, segments=self.segments) as reader:
for key, array in reader: for key, array in reader:
if self.return_shape: if self.return_shape:
array = array.shape array = array.shape
...@@ -72,9 +86,8 @@ class KaldiReader: ...@@ -72,9 +86,8 @@ class KaldiReader:
class HDF5Reader: class HDF5Reader:
def __init__(self, rspecifier, return_shape=False): def __init__(self, rspecifier, return_shape=False):
if ":" not in rspecifier: if ":" not in rspecifier:
raise ValueError( raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
'Give "rspecifier" such as "ark:some.ark: {}"'.format(self.rspecifier) format(self.rspecifier))
)
self.rspecifier = rspecifier self.rspecifier = rspecifier
self.ark_or_scp, self.filepath = self.rspecifier.split(":", 1) self.ark_or_scp, self.filepath = self.rspecifier.split(":", 1)
if self.ark_or_scp not in ["ark", "scp"]: if self.ark_or_scp not in ["ark", "scp"]:
...@@ -93,9 +106,7 @@ class HDF5Reader: ...@@ -93,9 +106,7 @@ class HDF5Reader:
raise RuntimeError( raise RuntimeError(
"scp file for hdf5 should be like: " "scp file for hdf5 should be like: "
'"uttid filepath.h5:key": {}({})'.format( '"uttid filepath.h5:key": {}({})'.format(
line, self.filepath line, self.filepath))
)
)
path, h5_key = value.split(":", 1) path, h5_key = value.split(":", 1)
hdf5_file = hdf5_dict.get(path) hdf5_file = hdf5_dict.get(path)
...@@ -110,9 +121,8 @@ class HDF5Reader: ...@@ -110,9 +121,8 @@ class HDF5Reader:
try: try:
data = hdf5_file[h5_key] data = hdf5_file[h5_key]
except Exception: except Exception:
logging.error( logging.error("Error when loading {} with key={}".
"Error when loading {} with key={}".format(path, h5_key) format(path, h5_key))
)
raise raise
if self.return_shape: if self.return_shape:
...@@ -144,9 +154,8 @@ class HDF5Reader: ...@@ -144,9 +154,8 @@ class HDF5Reader:
class SoundHDF5Reader: class SoundHDF5Reader:
def __init__(self, rspecifier, return_shape=False): def __init__(self, rspecifier, return_shape=False):
if ":" not in rspecifier: if ":" not in rspecifier:
raise ValueError( raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
'Give "rspecifier" such as "ark:some.ark: {}"'.format(rspecifier) format(rspecifier))
)
self.ark_or_scp, self.filepath = rspecifier.split(":", 1) self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
if self.ark_or_scp not in ["ark", "scp"]: if self.ark_or_scp not in ["ark", "scp"]:
raise ValueError(f"Must be scp or ark: {self.ark_or_scp}") raise ValueError(f"Must be scp or ark: {self.ark_or_scp}")
...@@ -163,9 +172,7 @@ class SoundHDF5Reader: ...@@ -163,9 +172,7 @@ class SoundHDF5Reader:
raise RuntimeError( raise RuntimeError(
"scp file for hdf5 should be like: " "scp file for hdf5 should be like: "
'"uttid filepath.h5:key": {}({})'.format( '"uttid filepath.h5:key": {}({})'.format(
line, self.filepath line, self.filepath))
)
)
path, h5_key = value.split(":", 1) path, h5_key = value.split(":", 1)
hdf5_file = hdf5_dict.get(path) hdf5_file = hdf5_dict.get(path)
...@@ -180,9 +187,8 @@ class SoundHDF5Reader: ...@@ -180,9 +187,8 @@ class SoundHDF5Reader:
try: try:
data = hdf5_file[h5_key] data = hdf5_file[h5_key]
except Exception: except Exception:
logging.error( logging.error("Error when loading {} with key={}".
"Error when loading {} with key={}".format(path, h5_key) format(path, h5_key))
)
raise raise
# Change Tuple[ndarray, int] -> Tuple[int, ndarray] # Change Tuple[ndarray, int] -> Tuple[int, ndarray]
...@@ -214,14 +220,12 @@ class SoundHDF5Reader: ...@@ -214,14 +220,12 @@ class SoundHDF5Reader:
class SoundReader: class SoundReader:
def __init__(self, rspecifier, return_shape=False): def __init__(self, rspecifier, return_shape=False):
if ":" not in rspecifier: if ":" not in rspecifier:
raise ValueError( raise ValueError('Give "rspecifier" such as "scp:some.scp: {}"'.
'Give "rspecifier" such as "scp:some.scp: {}"'.format(rspecifier) format(rspecifier))
)
self.ark_or_scp, self.filepath = rspecifier.split(":", 1) self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
if self.ark_or_scp != "scp": if self.ark_or_scp != "scp":
raise ValueError( raise ValueError('Only supporting "scp" for sound file: {}'.format(
'Only supporting "scp" for sound file: {}'.format(self.ark_or_scp) self.ark_or_scp))
)
self.return_shape = return_shape self.return_shape = return_shape
def __iter__(self): def __iter__(self):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from collections.abc import Sequence from collections.abc import Sequence
from distutils.util import strtobool as dist_strtobool from distutils.util import strtobool as dist_strtobool
import sys
import numpy import numpy
...@@ -36,10 +49,9 @@ def get_commandline_args(): ...@@ -36,10 +49,9 @@ def get_commandline_args():
# Escape the extra characters for shell # Escape the extra characters for shell
argv = [ argv = [
arg.replace("'", "'\\''") arg.replace("'", "'\\''") if all(char not in arg
if all(char not in arg for char in extra_chars) for char in extra_chars) else
else "'" + arg.replace("'", "'\\''") + "'" "'" + arg.replace("'", "'\\''") + "'" for arg in sys.argv
for arg in sys.argv
] ]
return sys.executable + " " + " ".join(argv) return sys.executable + " " + " ".join(argv)
...@@ -47,19 +59,12 @@ def get_commandline_args(): ...@@ -47,19 +59,12 @@ def get_commandline_args():
def is_scipy_wav_style(value): def is_scipy_wav_style(value):
# If Tuple[int, numpy.ndarray] or not # If Tuple[int, numpy.ndarray] or not
return ( return (isinstance(value, Sequence) and len(value) == 2 and
isinstance(value, Sequence) isinstance(value[0], int) and isinstance(value[1], numpy.ndarray))
and len(value) == 2
and isinstance(value[0], int)
and isinstance(value[1], numpy.ndarray)
)
def assert_scipy_wav_style(value): def assert_scipy_wav_style(value):
assert is_scipy_wav_style( assert is_scipy_wav_style(
value value), "Must be Tuple[int, numpy.ndarray], but got {}".format(
), "Must be Tuple[int, numpy.ndarray], but got {}".format( type(value) if not isinstance(value, Sequence) else "{}[{}]".format(
type(value) type(value), ", ".join(str(type(v)) for v in value)))
if not isinstance(value, Sequence)
else "{}[{}]".format(type(value), ", ".join(str(type(v)) for v in value))
)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
...@@ -6,18 +19,17 @@ import kaldiio ...@@ -6,18 +19,17 @@ import kaldiio
import numpy import numpy
import soundfile import soundfile
from deepspeech.utils.cli_utils import assert_scipy_wav_style
from deepspeech.io.reader import SoundHDF5File from deepspeech.io.reader import SoundHDF5File
from deepspeech.utils.cli_utils import assert_scipy_wav_style
def file_writer_helper( def file_writer_helper(
wspecifier: str, wspecifier: str,
filetype: str = "mat", filetype: str="mat",
write_num_frames: str = None, write_num_frames: str=None,
compress: bool = False, compress: bool=False,
compression_method: int = 2, compression_method: int=2,
pcm_format: str = "wav", pcm_format: str="wav", ):
):
"""Write matrices in kaldi style """Write matrices in kaldi style
Args: Args:
...@@ -61,20 +73,20 @@ def file_writer_helper( ...@@ -61,20 +73,20 @@ def file_writer_helper(
wspecifier, wspecifier,
write_num_frames=write_num_frames, write_num_frames=write_num_frames,
compress=compress, compress=compress,
compression_method=compression_method, compression_method=compression_method, )
)
elif filetype == "hdf5": elif filetype == "hdf5":
return HDF5Writer( return HDF5Writer(
wspecifier, write_num_frames=write_num_frames, compress=compress wspecifier, write_num_frames=write_num_frames, compress=compress)
)
elif filetype == "sound.hdf5": elif filetype == "sound.hdf5":
return SoundHDF5Writer( return SoundHDF5Writer(
wspecifier, write_num_frames=write_num_frames, pcm_format=pcm_format wspecifier,
) write_num_frames=write_num_frames,
pcm_format=pcm_format)
elif filetype == "sound": elif filetype == "sound":
return SoundWriter( return SoundWriter(
wspecifier, write_num_frames=write_num_frames, pcm_format=pcm_format wspecifier,
) write_num_frames=write_num_frames,
pcm_format=pcm_format)
else: else:
raise NotImplementedError(f"filetype={filetype}") raise NotImplementedError(f"filetype={filetype}")
...@@ -116,29 +128,27 @@ def get_num_frames_writer(write_num_frames: str): ...@@ -116,29 +128,27 @@ def get_num_frames_writer(write_num_frames: str):
""" """
if write_num_frames is not None: if write_num_frames is not None:
if ":" not in write_num_frames: if ":" not in write_num_frames:
raise ValueError( raise ValueError('Must include ":", write_num_frames={}'.format(
'Must include ":", write_num_frames={}'.format(write_num_frames) write_num_frames))
)
nframes_type, nframes_file = write_num_frames.split(":", 1) nframes_type, nframes_file = write_num_frames.split(":", 1)
if nframes_type != "ark,t": if nframes_type != "ark,t":
raise ValueError( raise ValueError("Only supporting text mode. "
"Only supporting text mode. " "e.g. --write-num-frames=ark,t:foo.txt :"
"e.g. --write-num-frames=ark,t:foo.txt :" "{}".format(nframes_type))
"{}".format(nframes_type)
)
return open(nframes_file, "w", encoding="utf-8") return open(nframes_file, "w", encoding="utf-8")
class KaldiWriter(BaseWriter): class KaldiWriter(BaseWriter):
def __init__( def __init__(self,
self, wspecifier, write_num_frames=None, compress=False, compression_method=2 wspecifier,
): write_num_frames=None,
compress=False,
compression_method=2):
if compress: if compress:
self.writer = kaldiio.WriteHelper( self.writer = kaldiio.WriteHelper(
wspecifier, compression_method=compression_method wspecifier, compression_method=compression_method)
)
else: else:
self.writer = kaldiio.WriteHelper(wspecifier) self.writer = kaldiio.WriteHelper(wspecifier)
self.writer_scp = None self.writer_scp = None
...@@ -220,7 +230,8 @@ class SoundHDF5Writer(BaseWriter): ...@@ -220,7 +230,8 @@ class SoundHDF5Writer(BaseWriter):
self.pcm_format = pcm_format self.pcm_format = pcm_format
spec_dict = parse_wspecifier(wspecifier) spec_dict = parse_wspecifier(wspecifier)
self.filename = spec_dict["ark"] self.filename = spec_dict["ark"]
self.writer = SoundHDF5File(spec_dict["ark"], "w", format=self.pcm_format) self.writer = SoundHDF5File(
spec_dict["ark"], "w", format=self.pcm_format)
if "scp" in spec_dict: if "scp" in spec_dict:
self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8") self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8")
else: else:
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
from distutils.util import strtobool
import logging import logging
from distutils.util import strtobool
import kaldiio import kaldiio
import numpy import numpy
...@@ -16,86 +16,81 @@ from deepspeech.utils.cli_writers import file_writer_helper ...@@ -16,86 +16,81 @@ from deepspeech.utils.cli_writers import file_writer_helper
def get_parser(): def get_parser():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="apply mean-variance normalization to files", description="apply mean-variance normalization to files",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
)
parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option") parser.add_argument(
"--verbose", "-V", default=0, type=int, help="Verbose option")
parser.add_argument( parser.add_argument(
"--in-filetype", "--in-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "sound.hdf5", "sound"], choices=["mat", "hdf5", "sound.hdf5", "sound"],
help="Specify the file format for the rspecifier. " help="Specify the file format for the rspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--stats-filetype", "--stats-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "npy"], choices=["mat", "hdf5", "npy"],
help="Specify the file format for the rspecifier. " help="Specify the file format for the rspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--out-filetype", "--out-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5"], choices=["mat", "hdf5"],
help="Specify the file format for the wspecifier. " help="Specify the file format for the wspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--norm-means", "--norm-means",
type=strtobool, type=strtobool,
default=True, default=True,
help="Do variance normalization or not.", help="Do variance normalization or not.", )
)
parser.add_argument( parser.add_argument(
"--norm-vars", "--norm-vars",
type=strtobool, type=strtobool,
default=False, default=False,
help="Do variance normalization or not.", help="Do variance normalization or not.", )
)
parser.add_argument( parser.add_argument(
"--reverse", type=strtobool, default=False, help="Do reverse mode or not" "--reverse",
) type=strtobool,
default=False,
help="Do reverse mode or not")
parser.add_argument( parser.add_argument(
"--spk2utt", "--spk2utt",
type=str, type=str,
help="A text file of speaker to utterance-list map. " help="A text file of speaker to utterance-list map. "
"(Don't give rspecifier format, such as " "(Don't give rspecifier format, such as "
'"ark:spk2utt")', '"ark:spk2utt")', )
)
parser.add_argument( parser.add_argument(
"--utt2spk", "--utt2spk",
type=str, type=str,
help="A text file of utterance to speaker map. " help="A text file of utterance to speaker map. "
"(Don't give rspecifier format, such as " "(Don't give rspecifier format, such as "
'"ark:utt2spk")', '"ark:utt2spk")', )
)
parser.add_argument( parser.add_argument(
"--write-num-frames", type=str, help="Specify wspecifer for utt2num_frames" "--write-num-frames",
) type=str,
help="Specify wspecifer for utt2num_frames")
parser.add_argument( parser.add_argument(
"--compress", type=strtobool, default=False, help="Save in compressed format" "--compress",
) type=strtobool,
default=False,
help="Save in compressed format")
parser.add_argument( parser.add_argument(
"--compression-method", "--compression-method",
type=int, type=int,
default=2, default=2,
help="Specify the method(if mat) or " "gzip-level(if hdf5)", help="Specify the method(if mat) or "
) "gzip-level(if hdf5)", )
parser.add_argument( parser.add_argument(
"stats_rspecifier_or_rxfilename", "stats_rspecifier_or_rxfilename",
help="Input stats. e.g. ark:stats.ark or stats.mat", help="Input stats. e.g. ark:stats.ark or stats.mat", )
)
parser.add_argument( parser.add_argument(
"rspecifier", type=str, help="Read specifier id. e.g. ark:some.ark" "rspecifier", type=str, help="Read specifier id. e.g. ark:some.ark")
)
parser.add_argument( parser.add_argument(
"wspecifier", type=str, help="Write specifier id. e.g. ark:some.ark" "wspecifier", type=str, help="Write specifier id. e.g. ark:some.ark")
)
return parser return parser
...@@ -118,8 +113,8 @@ def main(): ...@@ -118,8 +113,8 @@ def main():
stats_filetype = args.stats_filetype stats_filetype = args.stats_filetype
stats_dict = dict( stats_dict = dict(
file_reader_helper(args.stats_rspecifier_or_rxfilename, stats_filetype) file_reader_helper(args.stats_rspecifier_or_rxfilename,
) stats_filetype))
else: else:
is_rspcifier = False is_rspcifier = False
if args.stats_filetype == "mat": if args.stats_filetype == "mat":
...@@ -134,16 +129,14 @@ def main(): ...@@ -134,16 +129,14 @@ def main():
norm_vars=args.norm_vars, norm_vars=args.norm_vars,
utt2spk=args.utt2spk, utt2spk=args.utt2spk,
spk2utt=args.spk2utt, spk2utt=args.spk2utt,
reverse=args.reverse, reverse=args.reverse, )
)
with file_writer_helper( with file_writer_helper(
args.wspecifier, args.wspecifier,
filetype=args.out_filetype, filetype=args.out_filetype,
write_num_frames=args.write_num_frames, write_num_frames=args.write_num_frames,
compress=args.compress, compress=args.compress,
compression_method=args.compression_method, compression_method=args.compression_method, ) as writer:
) as writer:
for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype): for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
if is_scipy_wav_style(mat): if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray] # If data is sound file, then got as Tuple[int, ndarray]
......
#!/usr/bin/env python3 #!/usr/bin/env python3
# encoding: utf-8 # encoding: utf-8
# Copyright 2021 Kyoto University (Hirofumi Inaguma) # Copyright 2021 Kyoto University (Hirofumi Inaguma)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
import argparse import argparse
import codecs import codecs
from dateutil import parser
import glob import glob
import os import os
from dateutil import parser
def get_parser(): def get_parser():
parser = argparse.ArgumentParser(description="calculate real time factor (RTF)") parser = argparse.ArgumentParser(
description="calculate real time factor (RTF)")
parser.add_argument( parser.add_argument(
"--log-dir", "--log-dir",
type=str, type=str,
default=None, default=None,
help="path to logging directory", help="path to logging directory", )
)
return parser return parser
...@@ -37,23 +36,21 @@ def main(): ...@@ -37,23 +36,21 @@ def main():
with codecs.open(x, "r", "utf-8") as f: with codecs.open(x, "r", "utf-8") as f:
for line in f: for line in f:
x = line.strip() x = line.strip()
if "INFO: input lengths" in x: # 2021-10-25 08:22:04.052 | INFO | xxx:recog_v2:188 - feat: (1570, 83)
audio_durations += [int(x.split("input lengths: ")[1])] if "feat:" in x:
start_times += [parser.parse(x.split("(")[0])] dur = int(x.split("(")[1].split(',')[0])
elif "INFO: prediction" in x: audio_durations += [dur]
end_times += [parser.parse(x.split("(")[0])] start_times += [parser.parse(x.split("|")[0])]
assert len(audio_durations) == len(end_times), ( elif "total log probability:" in x:
len(audio_durations), end_times += [parser.parse(x.split("|")[0])]
len(end_times), assert len(audio_durations) == len(end_times), (len(audio_durations),
) len(end_times), )
assert len(start_times) == len(end_times), (len(start_times), len(end_times)) assert len(start_times) == len(end_times), (len(start_times),
len(end_times))
audio_sec += sum(audio_durations) / 100 # [sec] audio_sec += sum(audio_durations) / 100 # [sec]
decode_sec += sum( decode_sec += sum([(end - start).total_seconds()
[ for start, end in zip(start_times, end_times)])
(end - start).total_seconds()
for start, end in zip(start_times, end_times)
]
)
n_utt += len(audio_durations) n_utt += len(audio_durations)
print("Total audio duration: %.3f [sec]" % audio_sec) print("Total audio duration: %.3f [sec]" % audio_sec)
......
...@@ -19,44 +19,42 @@ def get_parser(): ...@@ -19,44 +19,42 @@ def get_parser():
"If wspecifier provided: per-utterance by default, " "If wspecifier provided: per-utterance by default, "
"or per-speaker if" "or per-speaker if"
"spk2utt option provided; if wxfilename: global", "spk2utt option provided; if wxfilename: global",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
)
parser.add_argument( parser.add_argument(
"--spk2utt", "--spk2utt",
type=str, type=str,
help="A text file of speaker to utterance-list map. " help="A text file of speaker to utterance-list map. "
"(Don't give rspecifier format, such as " "(Don't give rspecifier format, such as "
'"ark:utt2spk")', '"ark:utt2spk")', )
) parser.add_argument(
parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option") "--verbose", "-V", default=0, type=int, help="Verbose option")
parser.add_argument( parser.add_argument(
"--in-filetype", "--in-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "sound.hdf5", "sound"], choices=["mat", "hdf5", "sound.hdf5", "sound"],
help="Specify the file format for the rspecifier. " help="Specify the file format for the rspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--out-filetype", "--out-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "npy"], choices=["mat", "hdf5", "npy"],
help="Specify the file format for the wspecifier. " help="Specify the file format for the wspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--preprocess-conf", "--preprocess-conf",
type=str, type=str,
default=None, default=None,
help="The configuration file for the pre-processing", help="The configuration file for the pre-processing", )
)
parser.add_argument( parser.add_argument(
"rspecifier", type=str, help="Read specifier for feats. e.g. ark:some.ark" "rspecifier",
) type=str,
help="Read specifier for feats. e.g. ark:some.ark")
parser.add_argument( parser.add_argument(
"wspecifier_or_wxfilename", type=str, help="Write specifier. e.g. ark:some.ark" "wspecifier_or_wxfilename",
) type=str,
help="Write specifier. e.g. ark:some.ark")
return parser return parser
...@@ -92,10 +90,8 @@ def main(): ...@@ -92,10 +90,8 @@ def main():
return x return x
if args.out_filetype == "npy": if args.out_filetype == "npy":
logging.warning( logging.warning("--out-filetype npy is allowed only for "
"--out-filetype npy is allowed only for " "Global CMVN mode, changing to hdf5")
"Global CMVN mode, changing to hdf5"
)
args.out_filetype = "hdf5" args.out_filetype = "hdf5"
else: else:
...@@ -107,10 +103,8 @@ def main(): ...@@ -107,10 +103,8 @@ def main():
return None return None
if args.out_filetype == "hdf5": if args.out_filetype == "hdf5":
logging.warning( logging.warning("--out-filetype hdf5 is not allowed for "
"--out-filetype hdf5 is not allowed for " "Global CMVN mode, changing to npy")
"Global CMVN mode, changing to npy"
)
args.out_filetype = "npy" args.out_filetype = "npy"
if args.preprocess_conf is not None: if args.preprocess_conf is not None:
...@@ -126,8 +120,7 @@ def main(): ...@@ -126,8 +120,7 @@ def main():
idx = 0 idx = 0
for idx, (utt, matrix) in enumerate( for idx, (utt, matrix) in enumerate(
file_reader_helper(args.rspecifier, args.in_filetype), 1 file_reader_helper(args.rspecifier, args.in_filetype), 1):
):
if is_scipy_wav_style(matrix): if is_scipy_wav_style(matrix):
# If data is sound file, then got as Tuple[int, ndarray] # If data is sound file, then got as Tuple[int, ndarray]
rate, matrix = matrix rate, matrix = matrix
...@@ -146,7 +139,7 @@ def main(): ...@@ -146,7 +139,7 @@ def main():
counts[spk] += matrix.shape[0] counts[spk] += matrix.shape[0]
sum_feats[spk] += matrix.sum(axis=0) sum_feats[spk] += matrix.sum(axis=0)
square_sum_feats[spk] += (matrix ** 2).sum(axis=0) square_sum_feats[spk] += (matrix**2).sum(axis=0)
logging.info("Processed {} utterances".format(idx)) logging.info("Processed {} utterances".format(idx))
assert idx > 0, idx assert idx > 0, idx
...@@ -171,8 +164,8 @@ def main(): ...@@ -171,8 +164,8 @@ def main():
# Per utterance or speaker CMVN # Per utterance or speaker CMVN
if is_wspecifier: if is_wspecifier:
with file_writer_helper( with file_writer_helper(
args.wspecifier_or_wxfilename, filetype=args.out_filetype args.wspecifier_or_wxfilename,
) as writer: filetype=args.out_filetype) as writer:
for spk, mat in cmvn_stats.items(): for spk, mat in cmvn_stats.items():
writer[spk] = mat writer[spk] = mat
...@@ -186,8 +179,7 @@ def main(): ...@@ -186,8 +179,7 @@ def main():
kaldiio.save_mat(args.wspecifier_or_wxfilename, matrix) kaldiio.save_mat(args.wspecifier_or_wxfilename, matrix)
else: else:
raise RuntimeError( raise RuntimeError(
"Not supporting: --out-filetype {}".format(args.out_filetype) "Not supporting: --out-filetype {}".format(args.out_filetype))
)
if __name__ == "__main__": if __name__ == "__main__":
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import argparse import argparse
from distutils.util import strtobool
import logging import logging
from distutils.util import strtobool
from deepspeech.transform.transformation import Transformation from deepspeech.transform.transformation import Transformation
from deepspeech.utils.cli_readers import file_reader_helper from deepspeech.utils.cli_readers import file_reader_helper
...@@ -13,50 +13,50 @@ from deepspeech.utils.cli_writers import file_writer_helper ...@@ -13,50 +13,50 @@ from deepspeech.utils.cli_writers import file_writer_helper
def get_parser(): def get_parser():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="copy feature with preprocessing", description="copy feature with preprocessing",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
)
parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option") parser.add_argument(
"--verbose", "-V", default=0, type=int, help="Verbose option")
parser.add_argument( parser.add_argument(
"--in-filetype", "--in-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "sound.hdf5", "sound"], choices=["mat", "hdf5", "sound.hdf5", "sound"],
help="Specify the file format for the rspecifier. " help="Specify the file format for the rspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--out-filetype", "--out-filetype",
type=str, type=str,
default="mat", default="mat",
choices=["mat", "hdf5", "sound.hdf5", "sound"], choices=["mat", "hdf5", "sound.hdf5", "sound"],
help="Specify the file format for the wspecifier. " help="Specify the file format for the wspecifier. "
'"mat" is the matrix format in kaldi', '"mat" is the matrix format in kaldi', )
)
parser.add_argument( parser.add_argument(
"--write-num-frames", type=str, help="Specify wspecifer for utt2num_frames" "--write-num-frames",
) type=str,
help="Specify wspecifer for utt2num_frames")
parser.add_argument( parser.add_argument(
"--compress", type=strtobool, default=False, help="Save in compressed format" "--compress",
) type=strtobool,
default=False,
help="Save in compressed format")
parser.add_argument( parser.add_argument(
"--compression-method", "--compression-method",
type=int, type=int,
default=2, default=2,
help="Specify the method(if mat) or " "gzip-level(if hdf5)", help="Specify the method(if mat) or "
) "gzip-level(if hdf5)", )
parser.add_argument( parser.add_argument(
"--preprocess-conf", "--preprocess-conf",
type=str, type=str,
default=None, default=None,
help="The configuration file for the pre-processing", help="The configuration file for the pre-processing", )
)
parser.add_argument( parser.add_argument(
"rspecifier", type=str, help="Read specifier for feats. e.g. ark:some.ark" "rspecifier",
) type=str,
help="Read specifier for feats. e.g. ark:some.ark")
parser.add_argument( parser.add_argument(
"wspecifier", type=str, help="Write specifier. e.g. ark:some.ark" "wspecifier", type=str, help="Write specifier. e.g. ark:some.ark")
)
return parser return parser
...@@ -79,12 +79,11 @@ def main(): ...@@ -79,12 +79,11 @@ def main():
preprocessing = None preprocessing = None
with file_writer_helper( with file_writer_helper(
args.wspecifier, args.wspecifier,
filetype=args.out_filetype, filetype=args.out_filetype,
write_num_frames=args.write_num_frames, write_num_frames=args.write_num_frames,
compress=args.compress, compress=args.compress,
compression_method=args.compression_method, compression_method=args.compression_method, ) as writer:
) as writer:
for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype): for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
if is_scipy_wav_style(mat): if is_scipy_wav_style(mat):
# If data is sound file, then got as Tuple[int, ndarray] # If data is sound file, then got as Tuple[int, ndarray]
......
#!/usr/bin/env python3 #!/usr/bin/env python3
# encoding: utf-8 # encoding: utf-8
import argparse import argparse
import codecs import codecs
from distutils.util import strtobool
from io import open
import json import json
import logging import logging
import sys import sys
from distutils.util import strtobool
from io import open
from deepspeech.utils.cli_utils import get_commandline_args from deepspeech.utils.cli_utils import get_commandline_args
...@@ -47,45 +45,41 @@ def get_parser(): ...@@ -47,45 +45,41 @@ def get_parser():
"--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape " "--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape "
"--output-scps text:data/text shape:data/utt2text_shape:shape " "--output-scps text:data/text shape:data/utt2text_shape:shape "
"--scps utt2spk:data/utt2spk".format(sys.argv[0]), "--scps utt2spk:data/utt2spk".format(sys.argv[0]),
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
)
parser.add_argument( parser.add_argument(
"--input-scps", "--input-scps",
type=str, type=str,
nargs="*", nargs="*",
action="append", action="append",
default=[], default=[],
help="Json files for the inputs", help="Json files for the inputs", )
)
parser.add_argument( parser.add_argument(
"--output-scps", "--output-scps",
type=str, type=str,
nargs="*", nargs="*",
action="append", action="append",
default=[], default=[],
help="Json files for the outputs", help="Json files for the outputs", )
)
parser.add_argument( parser.add_argument(
"--scps", "--scps",
type=str, type=str,
nargs="+", nargs="+",
default=[], default=[],
help="The json files except for the input and outputs", help="The json files except for the input and outputs", )
) parser.add_argument(
parser.add_argument("--verbose", "-V", default=1, type=int, help="Verbose option") "--verbose", "-V", default=1, type=int, help="Verbose option")
parser.add_argument( parser.add_argument(
"--allow-one-column", "--allow-one-column",
type=strtobool, type=strtobool,
default=False, default=False,
help="Allow one column in input scp files. " help="Allow one column in input scp files. "
"In this case, the value will be empty string.", "In this case, the value will be empty string.", )
)
parser.add_argument( parser.add_argument(
"--out", "--out",
"-O", "-O",
type=str, type=str,
help="The output filename. " "If omitted, then output to sys.stdout", help="The output filename. "
) "If omitted, then output to sys.stdout", )
return parser return parser
...@@ -128,37 +122,33 @@ if __name__ == "__main__": ...@@ -128,37 +122,33 @@ if __name__ == "__main__":
# e.g. type_func_str = "int" -> type_func = int # e.g. type_func_str = "int" -> type_func = int
type_func = eval(type_func_str) type_func = eval(type_func_str)
except Exception: except Exception:
raise RuntimeError("Unknown type: {}".format(type_func_str)) raise RuntimeError(
"Unknown type: {}".format(type_func_str))
if not callable(type_func): if not callable(type_func):
raise RuntimeError("Unknown type: {}".format(type_func_str)) raise RuntimeError(
"Unknown type: {}".format(type_func_str))
else: else:
raise RuntimeError( raise RuntimeError(
"Format <key>:<filepath> " "Format <key>:<filepath> "
"or <key>:<filepath>:<type> " "or <key>:<filepath>:<type> "
"e.g. feat:data/feat.scp " "e.g. feat:data/feat.scp "
"or shape:data/feat.scp:shape: {}".format(key_scp) "or shape:data/feat.scp:shape: {}".format(key_scp))
)
for item in lis: for item in lis:
if key == item[0]: if key == item[0]:
raise RuntimeError( raise RuntimeError('The key "{}" is duplicated: {} {}'.
'The key "{}" is duplicated: {} {}'.format( format(key, item[3], key_scp))
key, item[3], key_scp
)
)
lis.append((key, scp, type_func, key_scp, type_func_str)) lis.append((key, scp, type_func, key_scp, type_func_str))
lis_list.append(lis) lis_list.append(lis)
# Open scp files # Open scp files
input_fscps = [ input_fscps = [[open(i[1], "r", encoding="utf-8") for i in il]
[open(i[1], "r", encoding="utf-8") for i in il] for il in input_infos for il in input_infos]
] output_fscps = [[open(i[1], "r", encoding="utf-8") for i in il]
output_fscps = [ for il in output_infos]
[open(i[1], "r", encoding="utf-8") for i in il] for il in output_infos
]
fscps = [[open(i[1], "r", encoding="utf-8") for i in il] for il in infos] fscps = [[open(i[1], "r", encoding="utf-8") for i in il] for il in infos]
# Note(kamo): What is done here? # Note(kamo): What is done here?
...@@ -200,12 +190,10 @@ if __name__ == "__main__": ...@@ -200,12 +190,10 @@ if __name__ == "__main__":
if line == "" or first == "": if line == "" or first == "":
if line != first: if line != first:
concat = sum(input_infos + output_infos + infos, []) concat = sum(input_infos + output_infos + infos, [])
raise RuntimeError( raise RuntimeError("The number of lines mismatch "
"The number of lines mismatch " 'between: "{}" and "{}"'.format(
'between: "{}" and "{}"'.format( concat[0][1],
concat[0][1], concat[count][1] concat[count][1]))
)
)
elif line.split()[0] != first.split()[0]: elif line.split()[0] != first.split()[0]:
concat = sum(input_infos + output_infos + infos, []) concat = sum(input_infos + output_infos + infos, [])
...@@ -216,9 +204,7 @@ if __name__ == "__main__": ...@@ -216,9 +204,7 @@ if __name__ == "__main__":
concat[0][1], concat[0][1],
concat[count][1], concat[count][1],
first.rstrip(), first.rstrip(),
line.rstrip(), line.rstrip(), ))
)
)
count += 1 count += 1
# The end of file # The end of file
...@@ -237,7 +223,8 @@ if __name__ == "__main__": ...@@ -237,7 +223,8 @@ if __name__ == "__main__":
]: ]:
lis = [] lis = []
for idx, (line_list, info_list) in enumerate(zip(_lines, _infos), 1): for idx, (line_list, info_list) in enumerate(
zip(_lines, _infos), 1):
if inout == "input": if inout == "input":
d = {"name": "input{}".format(idx)} d = {"name": "input{}".format(idx)}
elif inout == "output": elif inout == "output":
...@@ -254,9 +241,7 @@ if __name__ == "__main__": ...@@ -254,9 +241,7 @@ if __name__ == "__main__":
raise RuntimeError( raise RuntimeError(
"Format error {}th line in {}: " "Format error {}th line in {}: "
' Expecting "<key> <value>":\n>>> {}'.format( ' Expecting "<key> <value>":\n>>> {}'.format(
nutt, info[1], line nutt, info[1], line))
)
)
uttid = sps[0] uttid = sps[0]
value = "" value = ""
else: else:
...@@ -274,9 +259,7 @@ if __name__ == "__main__": ...@@ -274,9 +259,7 @@ if __name__ == "__main__":
logging.error( logging.error(
'"{}" is an invalid function ' '"{}" is an invalid function '
"for the {} th line in {}: \n>>> {}".format( "for the {} th line in {}: \n>>> {}".format(
info[4], nutt, info[1], line info[4], nutt, info[1], line))
)
)
raise raise
d[key] = value d[key] = value
...@@ -289,8 +272,11 @@ if __name__ == "__main__": ...@@ -289,8 +272,11 @@ if __name__ == "__main__":
entry.update(lis[0]) entry.update(lis[0])
entry = json.dumps( entry = json.dumps(
entry, indent=4, ensure_ascii=False, sort_keys=True, separators=(",", ": ") entry,
) indent=4,
ensure_ascii=False,
sort_keys=True,
separators=(",", ": "))
# Add indent # Add indent
indent = " " * 2 indent = " " * 2
entry = ("\n" + indent).join(entry.split("\n")) entry = ("\n" + indent).join(entry.split("\n"))
......
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright 2017 Johns Hopkins University (Shinji Watanabe) # Copyright 2017 Johns Hopkins University (Shinji Watanabe)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
import argparse import argparse
import codecs import codecs
import re import re
...@@ -27,28 +24,26 @@ def exist_or_not(i, match_pos): ...@@ -27,28 +24,26 @@ def exist_or_not(i, match_pos):
def get_parser(): def get_parser():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="convert raw text to tokenized text", description="convert raw text to tokenized text",
formatter_class=argparse.ArgumentDefaultsHelpFormatter, formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
)
parser.add_argument( parser.add_argument(
"--nchar", "--nchar",
"-n", "-n",
default=1, default=1,
type=int, type=int,
help="number of characters to split, i.e., \ help="number of characters to split, i.e., \
aabb -> a a b b with -n 1 and aa bb with -n 2", aabb -> a a b b with -n 1 and aa bb with -n 2", )
)
parser.add_argument( parser.add_argument(
"--skip-ncols", "-s", default=0, type=int, help="skip first n columns" "--skip-ncols", "-s", default=0, type=int, help="skip first n columns")
) parser.add_argument(
parser.add_argument("--space", default="<space>", type=str, help="space symbol") "--space", default="<space>", type=str, help="space symbol")
parser.add_argument( parser.add_argument(
"--non-lang-syms", "--non-lang-syms",
"-l", "-l",
default=None, default=None,
type=str, type=str,
help="list of non-linguistic symobles, e.g., <NOISE> etc.", help="list of non-linguistic symobles, e.g., <NOISE> etc.", )
) parser.add_argument(
parser.add_argument("text", type=str, default=False, nargs="?", help="input text") "text", type=str, default=False, nargs="?", help="input text")
parser.add_argument( parser.add_argument(
"--trans_type", "--trans_type",
"-t", "-t",
...@@ -60,8 +55,7 @@ def get_parser(): ...@@ -60,8 +55,7 @@ def get_parser():
read from SI1279.WRD file -> "bricks are an alternative" read from SI1279.WRD file -> "bricks are an alternative"
Else if trans_type is phn, Else if trans_type is phn,
read from SI1279.PHN file -> "sil b r ih sil k s aa r er n aa l read from SI1279.PHN file -> "sil b r ih sil k s aa r er n aa l
sil t er n ih sil t ih v sil" """, sil t er n ih sil t ih v sil" """, )
)
return parser return parser
...@@ -78,17 +72,17 @@ def main(): ...@@ -78,17 +72,17 @@ def main():
if args.text: if args.text:
f = codecs.open(args.text, encoding="utf-8") f = codecs.open(args.text, encoding="utf-8")
else: else:
f = codecs.getreader("utf-8")(sys.stdin if is_python2 else sys.stdin.buffer) f = codecs.getreader("utf-8")(sys.stdin
if is_python2 else sys.stdin.buffer)
sys.stdout = codecs.getwriter("utf-8")( sys.stdout = codecs.getwriter("utf-8")(sys.stdout
sys.stdout if is_python2 else sys.stdout.buffer if is_python2 else sys.stdout.buffer)
)
line = f.readline() line = f.readline()
n = args.nchar n = args.nchar
while line: while line:
x = line.split() x = line.split()
print(" ".join(x[: args.skip_ncols]), end=" ") print(" ".join(x[:args.skip_ncols]), end=" ")
a = " ".join(x[args.skip_ncols :]) a = " ".join(x[args.skip_ncols:])
# get all matched positions # get all matched positions
match_pos = [] match_pos = []
...@@ -118,7 +112,7 @@ def main(): ...@@ -118,7 +112,7 @@ def main():
i += 1 i += 1
a = chars a = chars
a = [a[j : j + n] for j in range(0, len(a), n)] a = [a[j:j + n] for j in range(0, len(a), n)]
a_flat = [] a_flat = []
for z in a: for z in a:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册