未验证 提交 2fa3e51a 编写于 作者: 0 0YuanZhang0 提交者: GitHub

fix_python3_bug (#3461)

上级 313d0666
...@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro ...@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start) - PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data and Models Preparation ### Data and Models Preparation
User can get the data and trained knowledge_distillation models directly we provided: User can get the data and trained knowledge_distillation models directly we provided:
......
...@@ -18,6 +18,7 @@ from __future__ import division ...@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from functools import partial from functools import partial
from functools import reduce
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE ...@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start) - PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data Preparation ### Data Preparation
#### Get data directly: #### Get data directly:
......
...@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh ...@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh
``` ```
## Start server ## Start server
We can set GPU card for bert server or xlnet server, By setting variable CUDA_VISIBLE_DEVICES:
```
export CUDA_VISIBLE_DEVICES=1
```
In main_server.py file we set the server port for bert and xlnet model, as shown below, If the port 5118 or 5120 is occupied, please set up an idle port.
```
url_1 = 'http://127.0.0.1:5118' # url for model1
url_2 = 'http://127.0.0.1:5120' # url for model2
```
start server
``` ```
bash start.sh bash start.sh
``` ```
cd bert_server cd bert_server
export CUDA_VISIBLE_DEVICES=1
sh start.sh sh start.sh
cd ../xlnet_server cd ../xlnet_server
export CUDA_VISIBLE_DEVICES=2
sh serve.sh sh serve.sh
cd .. cd ..
......
...@@ -12,126 +12,126 @@ SPIECE_UNDERLINE = '▁' ...@@ -12,126 +12,126 @@ SPIECE_UNDERLINE = '▁'
def printable_text(text): def printable_text(text):
"""Returns text encoded in a way suitable for print or `tf.logging`.""" """Returns text encoded in a way suitable for print or `tf.logging`."""
# These functions want `str` for both Python2 and Python3, but in one case # These functions want `str` for both Python2 and Python3, but in one case
# it's a Unicode string and in the other it's a byte string. # it's a Unicode string and in the other it's a byte string.
if six.PY3: if six.PY3:
if isinstance(text, str): if isinstance(text, str):
return text return text
elif isinstance(text, bytes): elif isinstance(text, bytes):
return text.decode("utf-8", "ignore") return text.decode("utf-8", "ignore")
else: else:
raise ValueError("Unsupported string type: %s" % (type(text))) raise ValueError("Unsupported string type: %s" % (type(text)))
elif six.PY2: elif six.PY2:
if isinstance(text, str): if isinstance(text, str):
return text return text
elif isinstance(text, unicode): elif isinstance(text, unicode):
return text.encode("utf-8") return text.encode("utf-8")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
else: else:
raise ValueError("Unsupported string type: %s" % (type(text))) raise ValueError("Not running on Python2 or Python 3?")
else:
raise ValueError("Not running on Python2 or Python 3?")
def print_(*args): def print_(*args):
new_args = [] new_args = []
for arg in args: for arg in args:
if isinstance(arg, list): if isinstance(arg, list):
s = [printable_text(i) for i in arg] s = [printable_text(i) for i in arg]
s = ' '.join(s) s = ' '.join(s)
new_args.append(s) new_args.append(s)
else: else:
new_args.append(printable_text(arg)) new_args.append(printable_text(arg))
print(*new_args) print(*new_args)
def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False): def preprocess_text(inputs, lower=False, remove_space=True, keep_accents=False):
if remove_space: if remove_space:
outputs = ' '.join(inputs.strip().split()) outputs = ' '.join(inputs.strip().split())
else: else:
outputs = inputs outputs = inputs
outputs = outputs.replace("``", '"').replace("''", '"') outputs = outputs.replace("``", '"').replace("''", '"')
if six.PY2 and isinstance(outputs, str): if six.PY2 and isinstance(outputs, str):
outputs = outputs.decode('utf-8') outputs = outputs.decode('utf-8')
if not keep_accents: if not keep_accents:
outputs = unicodedata.normalize('NFKD', outputs) outputs = unicodedata.normalize('NFKD', outputs)
outputs = ''.join([c for c in outputs if not unicodedata.combining(c)]) outputs = ''.join([c for c in outputs if not unicodedata.combining(c)])
if lower: if lower:
outputs = outputs.lower() outputs = outputs.lower()
return outputs return outputs
def encode_pieces(sp_model, text, return_unicode=True, sample=False): def encode_pieces(sp_model, text, return_unicode=True, sample=False):
# return_unicode is used only for py2 # return_unicode is used only for py2
# note(zhiliny): in some systems, sentencepiece only accepts str for py2 # note(zhiliny): in some systems, sentencepiece only accepts str for py2
if six.PY2 and isinstance(text, unicode): if six.PY2 and isinstance(text, unicode):
text = text.encode('utf-8') text = text.encode('utf-8')
if not sample: if not sample:
pieces = sp_model.EncodeAsPieces(text) pieces = sp_model.EncodeAsPieces(text)
else:
pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit():
cur_pieces = sp_model.EncodeAsPieces(
piece[:-1].replace(SPIECE_UNDERLINE, ''))
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
if len(cur_pieces[0]) == 1:
cur_pieces = cur_pieces[1:]
else:
cur_pieces[0] = cur_pieces[0][1:]
cur_pieces.append(piece[-1])
new_pieces.extend(cur_pieces)
else: else:
new_pieces.append(piece) pieces = sp_model.SampleEncodeAsPieces(text, 64, 0.1)
new_pieces = []
for piece in pieces:
if len(piece) > 1 and piece[-1] == ',' and piece[-2].isdigit():
cur_pieces = sp_model.EncodeAsPieces(
piece[:-1].replace(SPIECE_UNDERLINE, ''))
if piece[0] != SPIECE_UNDERLINE and cur_pieces[0][0] == SPIECE_UNDERLINE:
if len(cur_pieces[0]) == 1:
cur_pieces = cur_pieces[1:]
else:
cur_pieces[0] = cur_pieces[0][1:]
cur_pieces.append(piece[-1])
new_pieces.extend(cur_pieces)
else:
new_pieces.append(piece)
# note(zhiliny): convert back to unicode for py2 # note(zhiliny): convert back to unicode for py2
if six.PY2 and return_unicode: if six.PY2 and return_unicode:
ret_pieces = [] ret_pieces = []
for piece in new_pieces: for piece in new_pieces:
if isinstance(piece, str): if isinstance(piece, str):
piece = piece.decode('utf-8') piece = piece.decode('utf-8')
ret_pieces.append(piece) ret_pieces.append(piece)
new_pieces = ret_pieces new_pieces = ret_pieces
return new_pieces return new_pieces
def encode_ids(sp_model, text, sample=False): def encode_ids(sp_model, text, sample=False):
pieces = encode_pieces(sp_model, text, return_unicode=False, sample=sample) pieces = encode_pieces(sp_model, text, return_unicode=False, sample=sample)
ids = [sp_model.PieceToId(piece) for piece in pieces] ids = [sp_model.PieceToId(piece) for piece in pieces]
return ids return ids
if __name__ == '__main__': if __name__ == '__main__':
import sentencepiece as spm import sentencepiece as spm
sp = spm.SentencePieceProcessor() sp = spm.SentencePieceProcessor()
sp.load('sp10m.uncased.v3.model') sp.load('sp10m.uncased.v3.model')
print_(u'I was born in 2000, and this is falsé.') print_(u'I was born in 2000, and this is falsé.')
print_(u'ORIGINAL', sp.EncodeAsPieces(u'I was born in 2000, and this is falsé.')) print_(u'ORIGINAL', sp.EncodeAsPieces(u'I was born in 2000, and this is falsé.'))
print_(u'OURS', encode_pieces(sp, u'I was born in 2000, and this is falsé.')) print_(u'OURS', encode_pieces(sp, u'I was born in 2000, and this is falsé.'))
print(encode_ids(sp, u'I was born in 2000, and this is falsé.')) print(encode_ids(sp, u'I was born in 2000, and this is falsé.'))
print_('') print_('')
prepro_func = partial(preprocess_text, lower=True) prepro_func = partial(preprocess_text, lower=True)
print_(prepro_func('I was born in 2000, and this is falsé.')) print_(prepro_func('I was born in 2000, and this is falsé.'))
print_('ORIGINAL', sp.EncodeAsPieces(prepro_func('I was born in 2000, and this is falsé.'))) print_('ORIGINAL', sp.EncodeAsPieces(prepro_func('I was born in 2000, and this is falsé.')))
print_('OURS', encode_pieces(sp, prepro_func('I was born in 2000, and this is falsé.'))) print_('OURS', encode_pieces(sp, prepro_func('I was born in 2000, and this is falsé.')))
print(encode_ids(sp, prepro_func('I was born in 2000, and this is falsé.'))) print(encode_ids(sp, prepro_func('I was born in 2000, and this is falsé.')))
print_('') print_('')
print_('I was born in 2000, and this is falsé.') print_('I was born in 2000, and this is falsé.')
print_('ORIGINAL', sp.EncodeAsPieces('I was born in 2000, and this is falsé.')) print_('ORIGINAL', sp.EncodeAsPieces('I was born in 2000, and this is falsé.'))
print_('OURS', encode_pieces(sp, 'I was born in 2000, and this is falsé.')) print_('OURS', encode_pieces(sp, 'I was born in 2000, and this is falsé.'))
print(encode_ids(sp, 'I was born in 2000, and this is falsé.')) print(encode_ids(sp, 'I was born in 2000, and this is falsé.'))
print_('') print_('')
print_('I was born in 92000, and this is falsé.') print_('I was born in 92000, and this is falsé.')
print_('ORIGINAL', sp.EncodeAsPieces('I was born in 92000, and this is falsé.')) print_('ORIGINAL', sp.EncodeAsPieces('I was born in 92000, and this is falsé.'))
print_('OURS', encode_pieces(sp, 'I was born in 92000, and this is falsé.')) print_('OURS', encode_pieces(sp, 'I was born in 92000, and this is falsé.'))
print(encode_ids(sp, 'I was born in 92000, and this is falsé.')) print(encode_ids(sp, 'I was born in 92000, and this is falsé.'))
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册