fix_python3_bug (#3461)

2fa3e51a · 0YuanZhang0 · GitHub · 313d0666 · 2fa3e51a · 2fa3e51a
6 changed file
--- a/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/README.md
@@ -9,7 +9,7 @@ Model ensemble can improve the generalization of MRC models. However, such appro
 - Python >= 2.7
 - cuda >= 9.0
 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
+- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
 ### Data and Models Preparation
 User can get the data and trained knowledge_distillation models directly we provided: 

--- a/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/model/transformer_encoder.py
+++ b/PaddleNLP/Research/MRQA2019-D-NET/knowledge_distillation/model/transformer_encoder.py
@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 from functools import partial
+from functools import reduce
 import numpy as np
 import paddle.fluid as fluid

--- a/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/multi_task_learning/README.md
@@ -15,7 +15,7 @@ PALM user guide: [README.md](https://github.com/PaddlePaddle/PALM/blob/master/RE
 - Python >= 2.7
 - cuda >= 9.0
 - cudnn >= 7.0
- PaddlePaddle >= 1.5.0 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
+- PaddlePaddle >= 1.6 Please refer to Installation Guide [Installation Guide](http://www.paddlepaddle.org/#quick-start)
 ### Data Preparation
 #### Get data directly: 

--- a/PaddleNLP/Research/MRQA2019-D-NET/server/README.md
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/README.md
@@ -12,6 +12,17 @@ bash wget_server_inference_model.sh
 ```
 ## Start server
+We can set GPU card for bert server or xlnet server, By setting variable CUDA_VISIBLE_DEVICES:
+```
+export CUDA_VISIBLE_DEVICES=1
+```
+In main_server.py file we set the server port for bert and xlnet model, as shown below, If the port 5118 or 5120 is occupied, please set up an idle port. 
+```
+url_1 = 'http://127.0.0.1:5118'   # url for model1
+url_2 = 'http://127.0.0.1:5120'   # url for model2
+```
+start server
 ```
 bash start.sh
 ```
--- a/PaddleNLP/Research/MRQA2019-D-NET/server/start.sh
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/start.sh
 cd bert_server
+export CUDA_VISIBLE_DEVICES=1
 sh start.sh
 cd ../xlnet_server
+export CUDA_VISIBLE_DEVICES=2
 sh serve.sh
 cd ..

--- a/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/modeling.py
+++ b/PaddleNLP/Research/MRQA2019-D-NET/server/xlnet_server/modeling.py
@@ -80,6 +80,7 @@ def head_projection(h, d_model, n_head, d_head, param_initializer, name=''):
    head = fluid.layers.mul(x=h, y=proj_weight, x_num_col_dims=2, y_num_col_dims=1)
    return head 
 def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
                   param_initializer, residual=True, name=''):
    """Post-attention processing."""
@@ -113,6 +114,7 @@ def post_attention(h, attn_vec, d_model, n_head, d_head, dropout,
    return output
 def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
    """Core absolute positional attention operations."""
@@ -132,6 +134,7 @@ def abs_attn_core(q_head, k_head, v_head, attn_mask, dropatt, scale):
    return attn_vec
 def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
                  r_w_bias, r_r_bias, r_s_bias, attn_mask, dropatt,
                  scale):
@@ -180,6 +183,7 @@ def rel_attn_core(q_head, k_head_h, v_head_h, k_head_r, seg_embed, seg_mat,
    attn_vec = einsum4x4('ijbn,jbnd->ibnd', attn_prob, v_head_h)
    return attn_vec
 def rel_shift(x, klen=-1):
    """perform relative shift to form the relative attention score."""
    x_size = x.shape
@@ -207,6 +211,7 @@ def _cache_mem(curr_out, prev_mem, mem_len, reuse_len=None):
    new_mem.stop_gradient = True
    return new_mem
 def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
                                 bi_data, bsz=None, dtype=None):
    """create relative positional encoding."""
@@ -254,6 +259,7 @@ def relative_positional_encoding(qlen, klen, d_model, clamp_len, attn_type,
        fluid.layers.reshape(pos_emb, [2*qlen, -1, d_model], inplace=True)
    return pos_emb
 def rel_multihead_attn(h, r, r_w_bias, r_r_bias, seg_mat, r_s_bias, seg_embed,
                       attn_mask, mems, d_model, n_head, d_head, dropout,
                       dropatt, initializer, name=''):
@@ -548,6 +554,7 @@ def transformer_xl(inp_k, n_token, n_layer, d_model, n_head,
    new_mems = None
    return output, new_mems, lookup_table
 def lm_loss(hidden, target, n_token, d_model, initializer, lookup_table=None,
            tie_weight=False, bi_data=True):
@@ -626,6 +633,7 @@ def summarize_sequence(summary_type, hidden, d_model, n_head, d_head, dropout,
    return summary
 def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
                        return_logits=False):
    """
@@ -641,7 +649,7 @@ def classification_loss(hidden, labels, n_class, initializer, name, reuse=None,
        param_attr=fluid.ParamAttr(name=name+'_logits', initializer=initializer))
    one_hot_target = fluid.layers.one_hot(labels, depth=n_class, dtype=hidden.dtype)
-    loss = -fuid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1)
+    loss = -fluid.layers.reduce_sum(fluid.layers.log_softmax(logits) * one_hot_target, -1)
    if return_logits:
        return loss, logits