未验证 提交 2fa3e51a 编写于 作者: 0 0YuanZhang0 提交者: GitHub

fix_python3_bug (#3461)

上级 313d0666
...@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro ...@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start) - PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data and Models Preparation ### Data and Models Preparation
User can get the data and trained knowledge_distillation models directly we provided: User can get the data and trained knowledge_distillation models directly we provided:
......
...@@ -18,6 +18,7 @@ from __future__ import division ...@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from functools import partial from functools import partial
from functools import reduce
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE ...@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE
- Python >= 2.7 - Python >= 2.7
- cuda >= 9.0 - cuda >= 9.0
- cudnn >= 7.0 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start) - PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data Preparation ### Data Preparation
#### Get data directly: #### Get data directly:
......
...@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh ...@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh
``` ```
## Start server ## Start server
We can set GPU card for bert server or xlnet server, By setting variable CUDA_VISIBLE_DEVICES:
```
export CUDA_VISIBLE_DEVICES=1
```
In main_server.py file we set the server port for bert and xlnet model, as shown below, If the port 5118 or 5120 is occupied, please set up an idle port.
```
url_1 = 'http://127.0.0.1:5118' # url for model1
url_2 = 'http://127.0.0.1:5120' # url for model2
```
start server
``` ```
bash start.sh bash start.sh
``` ```
cd bert_server cd bert_server
export CUDA_VISIBLE_DEVICES=1
sh start.sh sh start.sh
cd ../xlnet_server cd ../xlnet_server
export CUDA_VISIBLE_DEVICES=2
sh serve.sh sh serve.sh
cd .. cd ..
......
...@@ -80,6 +80,7 @@ def head_projection(h, d_model, n_head, d_head, param_initializer, name=''): ...@@ -80,6 +80,7 @@ def head_projection(h, d_model, n_head, d_head, param_initializer, name=''):
head = fluid.layers.mul(x=h, y=proj_weight, x_num_col_dims=2, y_num_col_dims=1) head = fluid.layers.mul(x=h, y=proj_weight, x_num_col_dims=2, y_num_col_dims=1)
return head return head
def post_attention(h, attn_vec, d_model, n_head, d_head, dropout, def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
param_initializer, residual=True, name=''): param_initializer, residual=True, name=''):
"""Post-attention processing.""" """Post-attention processing."""
...@@ -113,6 +114,7 @@ def post_attention(h, attn_vec, d_model, n_head, d_head, dropout, ...@@ -113,6 +114,7 @@ def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
return output return output
def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale): def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
"""Core absolute positional attention operations.""" """Core absolute positional attention operations."""
...@@ -132,6 +134,7 @@ def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale): ...@@ -132,6 +134,7 @@ def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
return attn_vec return attn_vec
def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat, def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
r_w_bias, r_r_bias, r_s_bias, attn_mask, dropatt, r_w_bias, r_r_bias, r_s_bias, attn_mask, dropatt,
scale): scale):
...@@ -180,6 +183,7 @@ def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat, ...@@ -180,6 +183,7 @@ def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
attn_vec = einsum4x4('ijbn,jbnd->ibnd', attn_prob, v_head_h) attn_vec = einsum4x4('ijbn,jbnd->ibnd', attn_prob, v_head_h)
return attn_vec return attn_vec
def rel_shift(x, klen=-1): def rel_shift(x, klen=-1):
"""perform relative shift to form the relative attention score.""" """perform relative shift to form the relative attention score."""
x_size = x.shape x_size = x.shape
...@@ -207,6 +211,7 @@ def _cache_mem(curr_out, prev_mem, mem_len, reuse_len=None): ...@@ -207,6 +211,7 @@ def _cache_mem(curr_out, prev_mem, mem_len, reuse_len=None):
new_mem.stop_gradient = True new_mem.stop_gradient = True
return new_mem return new_mem
def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type, def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
bi_data, bsz=None, dtype=None): bi_data, bsz=None, dtype=None):
"""create relative positional encoding.""" """create relative positional encoding."""
...@@ -254,6 +259,7 @@ def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type, ...@@ -254,6 +259,7 @@ def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
fluid.layers.reshape(pos_emb, [2*qlen, -1, d_model], inplace=True) fluid.layers.reshape(pos_emb, [2*qlen, -1, d_model], inplace=True)
return pos_emb return pos_emb
def rel_multihead_attn(h, r, r_w_bias, r_r_bias, seg_mat, r_s_bias, seg_embed, def rel_multihead_attn(h, r, r_w_bias, r_r_bias, seg_mat, r_s_bias, seg_embed,
attn_mask, mems, d_model, n_head, d_head, dropout, attn_mask, mems, d_model, n_head, d_head, dropout,
dropatt, initializer, name=''): dropatt, initializer, name=''):
...@@ -548,6 +554,7 @@ def transformer_xl(inp_k, n_token, n_layer, d_model, n_head, ...@@ -548,6 +554,7 @@ def transformer_xl(inp_k, n_token, n_layer, d_model, n_head,
new_mems = None new_mems = None
return output, new_mems, lookup_table return output, new_mems, lookup_table
def lm_loss(hidden, target, n_token, d_model, initializer, lookup_table=None, def lm_loss(hidden, target, n_token, d_model, initializer, lookup_table=None,
tie_weight=False, bi_data=True): tie_weight=False, bi_data=True):
...@@ -626,6 +633,7 @@ def summarize_sequence(summary_type, hidden, d_model, n_head, d_head, dropout, ...@@ -626,6 +633,7 @@ def summarize_sequence(summary_type, hidden, d_model, n_head, d_head, dropout,
return summary return summary
def classification_loss(hidden, labels, n_class, initializer, name, reuse=None, def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
return_logits=False): return_logits=False):
""" """
...@@ -641,7 +649,7 @@ def classification_loss(hidden, labels, n_class, initializer, name, reuse=None, ...@@ -641,7 +649,7 @@ def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
param_attr=fluid.ParamAttr(name=name+'_logits', initializer=initializer)) param_attr=fluid.ParamAttr(name=name+'_logits', initializer=initializer))
one_hot_target = fluid.layers.one_hot(labels, depth=n_class, dtype=hidden.dtype) one_hot_target = fluid.layers.one_hot(labels, depth=n_class, dtype=hidden.dtype)
loss = -fuid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1) loss = -fluid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1)
if return_logits: if return_logits:
return loss, logits return loss, logits
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册