VarBase.__getitem__ work for np.int64, np.longlong; but __setitem_varbase__...

VarBase.__getitem__ work for np.int64, np.longlong; but __setitem_varbase__ not support paddle.int16/set_value op not support

VarBase.getitem work for np.int64, np.longlong; but __setitem_varbase__...
VarBase.__getitem__ work for np.int64, np.longlong; but __setitem_varbase__ not support paddle.int16/set_value op not support
231499a3 · Hui Zhang · c7508229 · 231499a3 · 231499a3 · 231499a3
4 changed file
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -568,26 +568,25 @@ class U2Tester(U2Trainer):
                ctc_probs = ctc_probs.squeeze(0)
                target = target.squeeze(0)
                alignment = ctc_utils.forced_align(ctc_probs, target)
-                logger.info("align ids", key[0], alignment)
+                logger.info(f"align ids: {key[0]} {alignment}")
                fout.write('{} {}\n'.format(key[0], alignment))
                # 3. gen praat
                # segment alignment
                align_segs = text_grid.segment_alignment(alignment)
-                logger.info("align tokens", key[0], align_segs)
+                logger.info(f"align tokens: {key[0]}, {align_segs}")
                # IntervalTier, List["start end token\n"]
                subsample = utility.get_subsample(self.config)
                tierformat = text_grid.align_to_tierformat(
                    align_segs, subsample, token_dict)
                # write tier
-                align_output_path = os.path.join(
+                align_output_path = Path(self.args.result_file).parent / "align"
-                    os.path.dirname(self.args.result_file), "align")
+                align_output_path.mkdir(parents=True, exist_ok=True)
-                tier_path = os.path.join(align_output_path, key[0] + ".tier")
+                tier_path = align_output_path / (key[0] + ".tier")
-                with open(tier_path, 'w') as f:
+                with tier_path.open('w') as f:
                    f.writelines(tierformat)
                # write textgrid
-                textgrid_path = os.path.join(align_output_path,
+                textgrid_path = align_output_path / (key[0] + ".TextGrid")
-                                             key[0] + ".TextGrid")
                second_per_frame = 1. / (1000. /
                                         stride_ms)  # 25ms window, 10ms stride
                second_per_example = (
@@ -595,7 +594,7 @@ class U2Tester(U2Trainer):
                text_grid.generate_textgrid(
                    maxtime=second_per_example,
                    intervals=tierformat,
-                    output=textgrid_path)
+                    output=str(textgrid_path))
    def run_align(self):
        self.resume_or_scratch()

--- a/deepspeech/exps/u2_kaldi/model.py
+++ b/deepspeech/exps/u2_kaldi/model.py
@@ -545,9 +545,8 @@ class U2Tester(U2Trainer):
        self.model.eval()
        logger.info(f"Align Total Examples: {len(self.align_loader.dataset)}")
-        stride_ms = self.config.collater.stride_ms
+        stride_ms = self.align_loader.collate_fn.stride_ms
-        token_dict = self.args.char_list
+        token_dict = self.align_loader.collate_fn.vocab_list
        with open(self.args.result_file, 'w') as fout:
            # one example in batch
            for i, batch in enumerate(self.align_loader):
@@ -564,26 +563,25 @@ class U2Tester(U2Trainer):
                ctc_probs = ctc_probs.squeeze(0)
                target = target.squeeze(0)
                alignment = ctc_utils.forced_align(ctc_probs, target)
-                logger.info("align ids", key[0], alignment)
+                logger.info(f"align ids: {key[0]} {alignment}")
                fout.write('{} {}\n'.format(key[0], alignment))
                # 3. gen praat
                # segment alignment
                align_segs = text_grid.segment_alignment(alignment)
-                logger.info("align tokens", key[0], align_segs)
+                logger.info(f"align tokens: {key[0]}, {align_segs}")
                # IntervalTier, List["start end token\n"]
                subsample = utility.get_subsample(self.config)
                tierformat = text_grid.align_to_tierformat(
                    align_segs, subsample, token_dict)
                # write tier
-                align_output_path = os.path.join(
+                align_output_path = Path(self.args.result_file).parent / "align"
-                    os.path.dirname(self.args.result_file), "align")
+                align_output_path.mkdir(parents=True, exist_ok=True)
-                tier_path = os.path.join(align_output_path, key[0] + ".tier")
+                tier_path = align_output_path / (key[0] + ".tier")
-                with open(tier_path, 'w') as f:
+                with tier_path.open('w') as f:
                    f.writelines(tierformat)
                # write textgrid
-                textgrid_path = os.path.join(align_output_path,
+                textgrid_path = align_output_path / (key[0] + ".TextGrid")
-                                             key[0] + ".TextGrid")
                second_per_frame = 1. / (1000. /
                                         stride_ms)  # 25ms window, 10ms stride
                second_per_example = (
@@ -591,7 +589,7 @@ class U2Tester(U2Trainer):
                text_grid.generate_textgrid(
                    maxtime=second_per_example,
                    intervals=tierformat,
-                    output=textgrid_path)
+                    output=str(textgrid_path))
    def run_align(self):
        self.resume_or_scratch()

--- a/deepspeech/exps/u2_st/model.py
+++ b/deepspeech/exps/u2_st/model.py
@@ -595,26 +595,25 @@ class U2STTester(U2STTrainer):
                ctc_probs = ctc_probs.squeeze(0)
                target = target.squeeze(0)
                alignment = ctc_utils.forced_align(ctc_probs, target)
-                logger.info("align ids", key[0], alignment)
+                logger.info(f"align ids: {key[0]} {alignment}")
                fout.write('{} {}\n'.format(key[0], alignment))
                # 3. gen praat
                # segment alignment
                align_segs = text_grid.segment_alignment(alignment)
-                logger.info("align tokens", key[0], align_segs)
+                logger.info(f"align tokens: {key[0]}, {align_segs}")
                # IntervalTier, List["start end token\n"]
                subsample = utility.get_subsample(self.config)
                tierformat = text_grid.align_to_tierformat(
                    align_segs, subsample, token_dict)
                # write tier
-                align_output_path = os.path.join(
+                align_output_path = Path(self.args.result_file).parent / "align"
-                    os.path.dirname(self.args.result_file), "align")
+                align_output_path.mkdir(parents=True, exist_ok=True)
-                tier_path = os.path.join(align_output_path, key[0] + ".tier")
+                tier_path = align_output_path / (key[0] + ".tier")
-                with open(tier_path, 'w') as f:
+                with tier_path.open('w') as f:
                    f.writelines(tierformat)
                # write textgrid
-                textgrid_path = os.path.join(align_output_path,
+                textgrid_path = align_output_path / (key[0] + ".TextGrid")
-                                             key[0] + ".TextGrid")
                second_per_frame = 1. / (1000. /
                                         stride_ms)  # 25ms window, 10ms stride
                second_per_example = (
@@ -622,7 +621,7 @@ class U2STTester(U2STTrainer):
                text_grid.generate_textgrid(
                    maxtime=second_per_example,
                    intervals=tierformat,
-                    output=textgrid_path)
+                    output=str(textgrid_path))
    def run_align(self):
        self.resume_or_scratch()

--- a/deepspeech/utils/ctc_utils.py
+++ b/deepspeech/utils/ctc_utils.py
@@ -86,8 +86,10 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
    log_alpha = paddle.zeros(
        (ctc_probs.size(0), len(y_insert_blank)))  #(T, 2L+1)
    log_alpha = log_alpha - float('inf')  # log of zero
+    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
    state_path = (paddle.zeros(
-        (ctc_probs.size(0), len(y_insert_blank)), dtype=paddle.int16) - 1
+        (ctc_probs.size(0), len(y_insert_blank)), dtype=paddle.int32) - 1
                  )  # state path, Tuple((T, 2L+1))
    # init start state
@@ -111,8 +113,8 @@ def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
            log_alpha[t, s] = paddle.max(candidates) + ctc_probs[t][
                y_insert_blank[s]]
            state_path[t, s] = prev_state[paddle.argmax(candidates)]
+    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
-    state_seq = -1 * paddle.ones((ctc_probs.size(0), 1), dtype=paddle.int16)
+    state_seq = -1 * paddle.ones((ctc_probs.size(0), 1), dtype=paddle.int32)
    candidates = paddle.to_tensor([
        log_alpha[-1, len(y_insert_blank) - 1],  # Sb