diff --git a/deep_speech_2/cloud/pcloud_submit.sh b/deep_speech_2/cloud/pcloud_submit.sh index a7fb42cbc4fd69d2861bff4be5a6351ff0e69b58..3c9a1c2603cc7232640e5cd48f11e113b999a0fb 100644 --- a/deep_speech_2/cloud/pcloud_submit.sh +++ b/deep_speech_2/cloud/pcloud_submit.sh @@ -1,6 +1,6 @@ TRAIN_MANIFEST="cloud/cloud.manifest.train" DEV_MANIFEST="cloud/cloud.manifest.dev" -CLOUD_MODEL_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/model" +CLOUD_MODEL_DIR="./checkpoints" BATCH_SIZE=256 NUM_GPU=8 NUM_NODE=1 @@ -11,7 +11,7 @@ DS2_PATH=${PWD%/*} cp -f pcloud_train.sh ${DS2_PATH} paddlecloud submit \ --image bootstrapper:5000/wanghaoshuang/pcloud_ds2:latest \ +-image bootstrapper:5000/paddlepaddle/pcloud_ds2:latest \ -jobname ${JOB_NAME} \ -cpu ${NUM_GPU} \ -gpu ${NUM_GPU} \ diff --git a/deep_speech_2/data_utils/data.py b/deep_speech_2/data_utils/data.py index 98180b4be73bffa8f2a752af10b53b33fc83bbb0..33fcadc7bb756cc41aaf62a4d47b5f19ebdc7923 100644 --- a/deep_speech_2/data_utils/data.py +++ b/deep_speech_2/data_utils/data.py @@ -85,9 +85,9 @@ class DataGenerator(object): self._rng = random.Random(random_seed) self._epoch = 0 # for caching tar files info - self.local_data = local() - self.local_data.tar2info = {} - self.local_data.tar2object = {} + self._local_data = local() + self._local_data.tar2info = {} + self._local_data.tar2object = {} def process_utterance(self, filename, transcript): """Load, augment, featurize and normalize for speech data. @@ -240,16 +240,16 @@ class DataGenerator(object): """ if file.startswith('tar:'): tarpath, filename = file.split(':', 1)[1].split('#', 1) - if 'tar2info' not in self.local_data.__dict__: - self.local_data.tar2info = {} - if 'tar2object' not in self.local_data.__dict__: - self.local_data.tar2object = {} - if tarpath not in self.local_data.tar2info: + if 'tar2info' not in self._local_data.__dict__: + self._local_data.tar2info = {} + if 'tar2object' not in self._local_data.__dict__: + self._local_data.tar2object = {} + if tarpath not in self._local_data.tar2info: object, infoes = self._parse_tar(tarpath) - self.local_data.tar2info[tarpath] = infoes - self.local_data.tar2object[tarpath] = object - return self.local_data.tar2object[tarpath].extractfile( - self.local_data.tar2info[tarpath][filename]) + self._local_data.tar2info[tarpath] = infoes + self._local_data.tar2object[tarpath] = object + return self._local_data.tar2object[tarpath].extractfile( + self._local_data.tar2info[tarpath][filename]) else: return open(file, 'r') diff --git a/deep_speech_2/data_utils/featurizer/audio_featurizer.py b/deep_speech_2/data_utils/featurizer/audio_featurizer.py index f0d223cfbe8bbae039de84fbbffcf0cd3975b790..39f453017e9a62d2740ee5e2d70cf3facfb7e040 100644 --- a/deep_speech_2/data_utils/featurizer/audio_featurizer.py +++ b/deep_speech_2/data_utils/featurizer/audio_featurizer.py @@ -57,7 +57,7 @@ class AudioFeaturizer(object): def featurize(self, audio_segment, allow_downsampling=True, - allow_upsamplling=True): + allow_upsampling=True): """Extract audio features from AudioSegment or SpeechSegment. :param audio_segment: Audio/speech segment to extract features from. diff --git a/deep_speech_2/layer.py b/deep_speech_2/layer.py index 8fec0eea39daf23f2495416ec46de8aef864b184..a91f694b8e92ff3e3b48e569ecc0a7751d26bee2 100644 --- a/deep_speech_2/layer.py +++ b/deep_speech_2/layer.py @@ -55,16 +55,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, act): :rtype: LayerOutput """ # input-hidden weights shared across bi-direcitonal rnn. - input_proj = paddle.layer.fc( + input_proj_forward = paddle.layer.fc( + input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) + input_proj_backward = paddle.layer.fc( input=input, size=size, act=paddle.activation.Linear(), bias_attr=False) # batch norm is only performed on input-state projection - input_proj_bn = paddle.layer.batch_norm( - input=input_proj, act=paddle.activation.Linear()) + input_proj_bn_forward = paddle.layer.batch_norm( + input=input_proj_forward, act=paddle.activation.Linear()) + input_proj_bn_backward = paddle.layer.batch_norm( + input=input_proj_backward, act=paddle.activation.Linear()) # forward and backward in time forward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=False) + input=input_proj_bn_forward, act=act, reverse=False) backward_simple_rnn = paddle.layer.recurrent( - input=input_proj_bn, act=act, reverse=True) + input=input_proj_bn_backward, act=act, reverse=True) return paddle.layer.concat(input=[forward_simple_rnn, backward_simple_rnn]) diff --git a/ltr/metrics.py b/ltr/metrics.py index f9f9277c6f761129aa11d53200b6fbc1de9657c2..12a77434bf0f90f87f5754d1dbef4dc4435cba21 100644 --- a/ltr/metrics.py +++ b/ltr/metrics.py @@ -19,7 +19,7 @@ def ndcg(score_list): n = len(score_list) cost = .0 for i in range(n): - cost += float(score_list[i]) / np.log((i + 1) + 1) + cost += float(np.power(2, score_list[i])) / np.log((i + 1) + 1) return cost dcg_cost = dcg(score_list) @@ -28,14 +28,11 @@ def ndcg(score_list): return dcg_cost / ideal_cost -class NdcgTest(unittest.TestCase): - def __init__(self): - pass - - def runcase(self): +class TestNDCG(unittest.TestCase): + def test_array(self): a = [3, 2, 3, 0, 1, 2] value = ndcg(a) - self.assertAlmostEqual(0.961, value, places=3) + self.assertAlmostEqual(0.9583, value, places=3) if __name__ == '__main__':