提交 2bdcf2c5 编写于 作者: H Hui Zhang

fix for train

上级 3e449d65
......@@ -52,7 +52,7 @@
language: system
files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
#exclude: (?=decoders/swig).*(\.cpp|\.h)$
- repo: https://github.com/asottile/reorder_python_imports
rev: v2.4.0
hooks:
- id: reorder-python-imports
#- repo: https://github.com/asottile/reorder_python_imports
# rev: v2.4.0
# hooks:
# - id: reorder-python-imports
......@@ -330,9 +330,10 @@ class AudioSegment(object):
# new_indices = np.linspace(start=0, stop=old_length, num=new_length)
# self._samples = np.interp(new_indices, old_indices, self._samples)
tfm = sox.Transformer()
tfm.set_globals(multithread=False)
tfm.speed(speed_rate)
self._samples = tfm.build_array(
input_array=self._samples, sample_rate_in=self._sample_rate)
input_array=self._samples, sample_rate_in=self._sample_rate).copy()
def normalize(self, target_db=-20, max_gain_db=300.0):
"""Normalize audio to be of the desired RMS value in decibels.
......
......@@ -113,7 +113,7 @@ class AugmentationPipeline():
Args:
spec_segment (np.ndarray): audio feature, (D, T).
"""
for augmentor, rate in zip(self._augmentors, self._rates):
for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
if self._rng.uniform(0., 1.) < rate:
spec_segment = augmentor.transform_feature(spec_segment)
return spec_segment
......
......@@ -40,7 +40,7 @@ class AugmentorBase():
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegmenet|SpeechSegment
"""
pass
raise NotImplementedError
@abstractmethod
def transform_feature(self, spec_segment):
......@@ -52,4 +52,4 @@ class AugmentorBase():
Args:
spec_segment (Spectrogram): Spectrogram segment to add effects to.
"""
pass
raise NotImplementedError
......@@ -133,7 +133,6 @@ class U2BaseModel(nn.Module):
smoothing=lsm_weight,
normalize_length=length_normalized_loss, )
@jit.export
def forward(
self,
speech: paddle.Tensor,
......
......@@ -6,7 +6,7 @@
"max_speed_rate": 1.1,
"num_rates": 3
},
"prob": 1.0
"prob": 0.0
},
{
"type": "shift",
......@@ -29,6 +29,6 @@
"adaptive_size_ratio": 0,
"max_n_time_masks": 20
},
"prob": 0.0
"prob": 1.0
}
]
......@@ -8,11 +8,11 @@ data:
spm_model_prefix: ''
mean_std_filepath: ""
augmentation_config: conf/augmentation.json
batch_size: 16
batch_size: 64
min_input_len: 0.5
max_input_len: 20.0
min_output_len: 0.0
max_output_len: 400
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0
raw_wav: True # use raw_wav or kaldi feature
......@@ -75,7 +75,7 @@ model:
training:
n_epoch: 240
accum_grad: 4
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
......@@ -85,7 +85,7 @@ training:
scheduler_conf:
warmup_steps: 25000
lr_decay: 1.0
log_interval: 100
log_interval: 1
decoding:
......
......@@ -6,7 +6,7 @@ if [ $# != 2 ];then
fi
python3 -u ${BIN_DIR}/export.py \
--config conf/deepspeech2.yaml \
--config conf/conformer.yaml \
--checkpoint_path ${1} \
--export_path ${2}
......
......@@ -9,7 +9,7 @@ fi
python3 -u ${BIN_DIR}/test.py \
--device 'gpu' \
--nproc 1 \
--config conf/deepspeech2.yaml \
--config conf/conformer.yaml \
--output ckpt
if [ $? -ne 0 ]; then
......
......@@ -6,7 +6,7 @@ echo "using $ngpu gpus..."
python3 -u ${BIN_DIR}/train.py \
--device 'gpu' \
--nproc ${ngpu} \
--config conf/deepspeech2.yaml \
--config conf/conformer.yaml \
--output ckpt-${1}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册