未验证 提交 2fa3e51a 编写于 作者: 0 0YuanZhang0 提交者: GitHub

fix_python3_bug (#3461)

上级 313d0666
......@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro
- Python >= 2.7
- cuda >= 9.0
- cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data and Models Preparation
User can get the data and trained knowledge_distillation models directly we provided:
......
......@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function
from functools import partial
from functools import reduce
import numpy as np
import paddle.fluid as fluid
......
......@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE
- Python >= 2.7
- cuda >= 9.0
- cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
### Data Preparation
#### Get data directly:
......
......@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh
```
## Start server
We can set GPU card for bert server or xlnet server, By setting variable CUDA_VISIBLE_DEVICES:
```
export CUDA_VISIBLE_DEVICES=1
```
In main_server.py file we set the server port for bert and xlnet model, as shown below, If the port 5118 or 5120 is occupied, please set up an idle port.
```
url_1 = 'http://127.0.0.1:5118' # url for model1
url_2 = 'http://127.0.0.1:5120' # url for model2
```
start server
```
bash start.sh
```
cd bert_server
export CUDA_VISIBLE_DEVICES=1
sh start.sh
cd ../xlnet_server
export CUDA_VISIBLE_DEVICES=2
sh serve.sh
cd ..
......
......@@ -80,6 +80,7 @@ def head_projection(h, d_model, n_head, d_head, param_initializer, name=''):
head = fluid.layers.mul(x=h, y=proj_weight, x_num_col_dims=2, y_num_col_dims=1)
return head
def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
param_initializer, residual=True, name=''):
"""Post-attention processing."""
......@@ -113,6 +114,7 @@ def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
return output
def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
"""Core absolute positional attention operations."""
......@@ -132,6 +134,7 @@ def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
return attn_vec
def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
r_w_bias, r_r_bias, r_s_bias, attn_mask, dropatt,
scale):
......@@ -180,6 +183,7 @@ def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
attn_vec = einsum4x4('ijbn,jbnd->ibnd', attn_prob, v_head_h)
return attn_vec
def rel_shift(x, klen=-1):
"""perform relative shift to form the relative attention score."""
x_size = x.shape
......@@ -207,6 +211,7 @@ def _cache_mem(curr_out, prev_mem, mem_len, reuse_len=None):
new_mem.stop_gradient = True
return new_mem
def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
bi_data, bsz=None, dtype=None):
"""create relative positional encoding."""
......@@ -254,6 +259,7 @@ def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
fluid.layers.reshape(pos_emb, [2*qlen, -1, d_model], inplace=True)
return pos_emb
def rel_multihead_attn(h, r, r_w_bias, r_r_bias, seg_mat, r_s_bias, seg_embed,
attn_mask, mems, d_model, n_head, d_head, dropout,
dropatt, initializer, name=''):
......@@ -548,6 +554,7 @@ def transformer_xl(inp_k, n_token, n_layer, d_model, n_head,
new_mems = None
return output, new_mems, lookup_table
def lm_loss(hidden, target, n_token, d_model, initializer, lookup_table=None,
tie_weight=False, bi_data=True):
......@@ -626,6 +633,7 @@ def summarize_sequence(summary_type, hidden, d_model, n_head, d_head, dropout,
return summary
def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
return_logits=False):
"""
......@@ -641,7 +649,7 @@ def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
param_attr=fluid.ParamAttr(name=name+'_logits', initializer=initializer))
one_hot_target = fluid.layers.one_hot(labels, depth=n_class, dtype=hidden.dtype)
loss = -fuid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1)
loss = -fluid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1)
if return_logits:
return loss, logits
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册