fix_python3_bug (#3461)

2fa3e51a · 0YuanZhang0 · GitHub · 313d0666 · 2fa3e51a · 2fa3e51a
7 changed file
--- a/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/README.md
@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro
 - Python >= 2.7
 - cuda >= 9.0
 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
+- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
 ### Data and Models Preparation
 User can get the data and trained knowledge_distillation models directly we provided: 

--- a/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/model/transformer_encoder.py
+++ b/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/model/transformer_encoder.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 from functools import partial
+from functools import reduce
 import numpy as np
 import paddle.fluid as fluid

--- a/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/README.md
@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE
 - Python >= 2.7
 - cuda >= 9.0
 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
+- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
 ### Data Preparation
 #### Get data directly: 

--- a/PaddleNLP/Research/MRQA2019-D-NET/server/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/README.md
@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh
 ```
 ## Start server
+We can set GPU card for bert server or xlnet server, By setting variable CUDA_VISIBLE_DEVICES:
+```
+export CUDA_VISIBLE_DEVICES=1
+```
+In main_server.py file we set the server port for bert and xlnet model, as shown below, If the port 5118 or 5120 is occupied, please set up an idle port. 
+```
+url_1 = 'http://127.0.0.1:5118'   # url for model1
+url_2 = 'http://127.0.0.1:5120'   # url for model2
+```
+start server
 ```
 bash start.sh
 ```
--- a/PaddleNLP/Research/MRQA2019-D-NET/server/start.sh
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/start.sh
 cd bert_server
+export CUDA_VISIBLE_DEVICES=1
 sh start.sh
 cd ../xlnet_server
+export CUDA_VISIBLE_DEVICES=2
 sh serve.sh
 cd ..

--- a/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/modeling.py
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/modeling.py
--- a/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/prepro_utils.py
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/prepro_utils.py
@@ -12,126 +12,126 @@ SPIECE_UNDERLINE = '▁'
 def printable_text(text):
-  """Returns text encoded in a way suitable for print or `tf.logging`."""
+    """Returns text encoded in a way suitable for print or `tf.logging`."""
-  # These functions want `str` for both Python2 and Python3, but in one case
+    # These functions want `str` for both Python2 and Python3, but in one case
-  # it's a Unicode string and in the other it's a byte string.
+    # it's a Unicode string and in the other it's a byte string.
-  if six.PY3:
+    if six.PY3:
-    if isinstance(text, str):
+        if isinstance(text, str):
-      return text
+            return text
-    elif isinstance(text, bytes):
+        elif isinstance(text, bytes):
-      return text.decode("utf-8", "ignore")
+            return text.decode("utf-8", "ignore")
-    else:
+        else:
-      raise ValueError("Unsupported string type: %s" % (type(text)))
+            raise ValueError("Unsupported string type: %s" % (type(text)))
-  elif six.PY2:
+    elif six.PY2:
-    if isinstance(text, str):
+        if isinstance(text, str):
-      return text
+            return text
-    elif isinstance(text, unicode):
+        elif isinstance(text, unicode):
-      return text.encode("utf-8")
+            return text.encode("utf-8")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
    else:
-      raise ValueError("Unsupported string type: %s" % (type(text)))
+        raise ValueError("Not running on Python2 or Python 3?")
-  else:
-    raise ValueError("Not running on Python2 or Python 3?")
 def print_(*args):
-  new_args = []
+    new_args = []
-  for arg in args:
+    for arg in args:
-    if isinstance(arg, list):
+        if isinstance(arg, list):
-      s = [printable_text(i) for i in arg]
+            s = [printable_text(i) for i in arg]
-      s = ' '.join(s)
+            s = ' '.join(s)
-      new_args.append(s)
+            new_args.append(s)
-    else:
+        else:
-      new_args.append(printable_text(arg))
+            new_args.append(printable_text(arg))
-  print(*new_args)
+    print(*new_args)
 def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False):
-  if remove_space:
+    if remove_space:
-    outputs = ' '.join(inputs.strip().split())
+        outputs = ' '.join(inputs.strip().split())
-  else:
+    else:
-    outputs = inputs
+        outputs = inputs
-  outputs = outputs.replace("``", '"').replace("''", '"')
+    outputs = outputs.replace("``", '"').replace("''", '"')
-  if six.PY2 and isinstance(outputs, str):
+    if six.PY2 and isinstance(outputs, str):
-    outputs = outputs.decode('utf-8')
+        outputs = outputs.decode('utf-8')
-  if not keep_accents:
+    if not keep_accents:
-    outputs = unicodedata.normalize('NFKD', outputs)
+        outputs = unicodedata.normalize('NFKD', outputs)
-    outputs = ''.join([c for c in outputs if not unicodedata.combining(c)])
+        outputs = ''.join([c for c in outputs if not unicodedata.combining(c)])
-  if lower:
+    if lower:
-    outputs = outputs.lower()
+        outputs = outputs.lower()
-  return outputs
+    return outputs
 def encode_pieces(sp_model, text, return_unicode=True, sample=False):
-  # return_unicode is used only for py2
+    # return_unicode is used only for py2
-  # note(zhiliny): in some systems, sentencepiece only accepts str for py2
+    # note(zhiliny): in some systems, sentencepiece only accepts str for py2
-  if six.PY2 and isinstance(text, unicode):
+    if six.PY2 and isinstance(text, unicode):
-    text = text.encode('utf-8')
+        text = text.encode('utf-8')
-  if not sample:
+    if not sample:
-    pieces = sp_model.EncodeAsPieces(text)
+        pieces = sp_model.EncodeAsPieces(text)
-  else:
-    pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1)
-  new_pieces = []
-  for piece in pieces:
-    if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit():
-      cur_pieces = sp_model.EncodeAsPieces(
-          piece[:-1].replace(SPIECE_UNDERLINE, ''))
-      if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
-        if len(cur_pieces[0]) == 1:
-          cur_pieces = cur_pieces[1:]
-        else:
-          cur_pieces[0] = cur_pieces[0][1:]
-      cur_pieces.append(piece[-1])
-      new_pieces.extend(cur_pieces)
    else:
-      new_pieces.append(piece)
+        pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1)
+    new_pieces = []
+    for piece in pieces:
+        if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit():
+            cur_pieces = sp_model.EncodeAsPieces(
+                    piece[:-1].replace(SPIECE_UNDERLINE, ''))
+            if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
+                if len(cur_pieces[0]) == 1:
+                    cur_pieces = cur_pieces[1:]
+                else:
+                    cur_pieces[0] = cur_pieces[0][1:]
+            cur_pieces.append(piece[-1])
+            new_pieces.extend(cur_pieces)
+        else:
+            new_pieces.append(piece)
-  # note(zhiliny): convert back to unicode for py2
+    # note(zhiliny): convert back to unicode for py2
-  if six.PY2 and return_unicode:
+    if six.PY2 and return_unicode:
-    ret_pieces = []
+        ret_pieces = []
-    for piece in new_pieces:
+        for piece in new_pieces:
-      if isinstance(piece, str):
+            if isinstance(piece, str):
-        piece = piece.decode('utf-8')
+                piece = piece.decode('utf-8')
-      ret_pieces.append(piece)
+            ret_pieces.append(piece)
-    new_pieces = ret_pieces
+        new_pieces = ret_pieces
-  return new_pieces
+    return new_pieces
 def encode_ids(sp_model, text, sample=False):
-  pieces = encode_pieces(sp_model, text, return_unicode=False, sample=sample)
+    pieces = encode_pieces(sp_model, text, return_unicode=False, sample=sample)
-  ids = [sp_model.PieceToId(piece) for piece in pieces]
+    ids = [sp_model.PieceToId(piece) for piece in pieces]
-  return ids
+    return ids
 if __name__ == '__main__':
-  import sentencepiece as spm
+    import sentencepiece as spm
-  sp = spm.SentencePieceProcessor()
+    sp = spm.SentencePieceProcessor()
-  sp.load('sp10m.uncased.v3.model')
+    sp.load('sp10m.uncased.v3.model')
-  print_(u'I was born in 2000, and this is falsé.')
+    print_(u'I was born in 2000, and this is falsé.')
-  print_(u'ORIGINAL', sp.EncodeAsPieces(u'I was born in 2000, and this is falsé.'))
+    print_(u'ORIGINAL', sp.EncodeAsPieces(u'I was born in 2000, and this is falsé.'))
-  print_(u'OURS', encode_pieces(sp, u'I was born in 2000, and this is falsé.'))
+    print_(u'OURS', encode_pieces(sp, u'I was born in 2000, and this is falsé.'))
-  print(encode_ids(sp, u'I was born in 2000, and this is falsé.'))
+    print(encode_ids(sp, u'I was born in 2000, and this is falsé.'))
-  print_('')
+    print_('')
-  prepro_func = partial(preprocess_text, lower=True)
+    prepro_func = partial(preprocess_text, lower=True)
-  print_(prepro_func('I was born in 2000, and this is falsé.'))
+    print_(prepro_func('I was born in 2000, and this is falsé.'))
-  print_('ORIGINAL', sp.EncodeAsPieces(prepro_func('I was born in 2000, and this is falsé.')))
+    print_('ORIGINAL', sp.EncodeAsPieces(prepro_func('I was born in 2000, and this is falsé.')))
-  print_('OURS', encode_pieces(sp, prepro_func('I was born in 2000, and this is falsé.')))
+    print_('OURS', encode_pieces(sp, prepro_func('I was born in 2000, and this is falsé.')))
-  print(encode_ids(sp, prepro_func('I was born in 2000, and this is falsé.')))
+    print(encode_ids(sp, prepro_func('I was born in 2000, and this is falsé.')))
-  print_('')
+    print_('')
-  print_('I was born in 2000, and this is falsé.')
+    print_('I was born in 2000, and this is falsé.')
-  print_('ORIGINAL', sp.EncodeAsPieces('I was born in 2000, and this is falsé.'))
+    print_('ORIGINAL', sp.EncodeAsPieces('I was born in 2000, and this is falsé.'))
-  print_('OURS', encode_pieces(sp, 'I was born in 2000, and this is falsé.'))
+    print_('OURS', encode_pieces(sp, 'I was born in 2000, and this is falsé.'))
-  print(encode_ids(sp, 'I was born in 2000, and this is falsé.'))
+    print(encode_ids(sp, 'I was born in 2000, and this is falsé.'))
-  print_('')
+    print_('')
-  print_('I was born in 92000, and this is falsé.')
+    print_('I was born in 92000, and this is falsé.')
-  print_('ORIGINAL', sp.EncodeAsPieces('I was born in 92000, and this is falsé.'))
+    print_('ORIGINAL', sp.EncodeAsPieces('I was born in 92000, and this is falsé.'))
-  print_('OURS', encode_pieces(sp, 'I was born in 92000, and this is falsé.'))
+    print_('OURS', encode_pieces(sp, 'I was born in 92000, and this is falsé.'))
-  print(encode_ids(sp, 'I was born in 92000, and this is falsé.'))
+    print(encode_ids(sp, 'I was born in 92000, and this is falsé.'))