未验证 提交 752e71a8 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #14 from xixiaoyao/master

fix bugs
......@@ -2,5 +2,8 @@
__pycache__
pretrain_model
output_model
build
dist
paddle_palm.egg-info
mrqa_output
*.log
task_instance: "mrqa"
target_tag: 1
mix_ratio: 1.0
save_path: "output_model/firstrun"
backbone: "ernie"
backbone_config_path: "pretrain_model/ernie/ernie_config.json"
backbone: "bert"
backbone_config_path: "pretrain_model/bert/bert_config.json"
vocab_path: "pretrain_model/ernie/vocab.txt"
do_lower_case: True
max_seq_len: 512
batch_size: 5
num_epochs: 3
batch_size: 4
num_epochs: 2
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
......
task_instance: "mrqa, mlm4mrqa, match4mrqa"
target_tag: 1, 0, 0
mix_ratio: 0.5, 1.0, 0.5
task_instance: "mrqa, match4mrqa"
target_tag: 1, 0
mix_ratio: 0.5, 0.5
save_path: "output_model/secondrun"
......@@ -11,8 +11,8 @@ vocab_path: "pretrain_model/ernie/vocab.txt"
do_lower_case: True
max_seq_len: 512
batch_size: 5
num_epochs: 5
batch_size: 4
num_epochs: 2
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
......
task_instance: "mlm4mrqa"
save_path: "output_model/firstrun"
backbone: "ernie"
backbone_config_path: "pretrain_model/ernie/ernie_config.json"
vocab_path: "pretrain_model/ernie/vocab.txt"
do_lower_case: True
max_seq_len: 512
batch_size: 5
num_epochs: 100
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
weight_decay: 0.1
print_every_n_steps: 1
此差异已折叠。
因为 它太大了无法显示 source diff 。你可以改为 查看blob
此差异已折叠。
......@@ -2,9 +2,6 @@ import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('config_demo1.yaml', task_dir='demo1_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
controller.load_pretrain('pretrain_model/bert/params')
controller.train()
controller = palm.Controller(config='config_demo1.yaml', task_dir='demo1_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model')
train_file: data/mrqa/mrqa-combined.train.raw.json
pred_file: data/mrqa/mrqa-combined.dev.raw.json
pred_output_path: 'mrqa_output'
reader: mrc4ernie
train_file: data/mrqa/train.json
reader: mrc
paradigm: mrc
vocab_path: "pretrain_model/bert/vocab.txt"
do_lower_case: True
max_seq_len: 512
doc_stride: 128
max_query_len: 64
max_answer_len: 30
n_best_size: 20
null_score_diff_threshold: 0.0
verbose: False
......@@ -3,8 +3,9 @@ import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('config_demo2.yaml', task_dir='demo2_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
# controller.train()
controller.train()
# controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False)
# controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
train_file: "data/match4mrqa/train.txt"
reader: match4ernie
train_file: "data/match4mrqa/train.tsv"
reader: match
paradigm: match
train_file: "data/mlm4mrqa/train.txt"
train_file: "data/mlm4mrqa/train.tsv"
reader: mlm
paradigm: mlm
train_file: data/mrqa/mrqa-combined.train.raw.json
pred_file: data/mrqa/mrqa-combined.dev.raw.json
train_file: data/mrqa/train.json
pred_file: data/mrqa/dev.json
pred_output_path: 'mrqa_output'
reader: mrc4ernie
reader: mrc
paradigm: mrc
doc_stride: 128
max_query_len: 64
......
import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('config_demo3.yaml', task_dir='demo3_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
controller.train()
# controller = palm.Controller(config='config_demo3.yaml', task_dir='demo3_tasks', for_train=False)
# controller.pred('cls4mrqa', inference_model_dir='output_model/thirdrun/infer_model')
train_file: "data/mlm4mrqa/train.tsv"
reader: mlm
paradigm: mlm
train_file: data/cls4mrqa/train.tsv
reader: cls
paradigm: cls
......@@ -41,11 +41,9 @@ class Model(backbone):
self._prepostprocess_dropout = config["hidden_dropout_prob"]
self._attention_dropout = config["attention_probs_dropout_prob"]
self.model_name = model_name
self._word_emb_name = self.model_name + "word_embedding"
self._pos_emb_name = self.model_name + "pos_embedding"
self._sent_emb_name = self.model_name + "sent_embedding"
self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding"
self._sent_emb_name = "sent_embedding"
# Initialize all weigths by truncated normal initializer, and all biases
# will be initialized by constant zero by default.
......@@ -62,90 +60,91 @@ class Model(backbone):
@property
def outputs_attr(self):
return {"word_embedding": [[-1, -1, self._emb_size], 'float32'],
"embedding_table": [[-1, self._voc_size, self._emb_size], 'float32'],
"encoder_outputs": [[-1, -1, self._emb_size], 'float32'],
"sentence_embedding": [[-1, self._emb_size], 'float32'],
"sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
def build(self, inputs):
def build(self, inputs, scope_name=""):
src_ids = inputs['token_ids']
pos_ids = inputs['position_ids']
sent_ids = inputs['segment_ids']
input_mask = inputs['input_mask']
self._emb_dtype = 'float32'
# padding id in vocabulary must be set to 0
emb_out = layers.embedding(
emb_out = fluid.layers.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype="float32",
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
name=scope_name+self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
self.emb_out = emb_out
position_emb_out = layers.embedding(
position_emb_out = fluid.layers.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype="float32",
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
self.position_emb_out = position_emb_out
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = layers.embedding(
sent_emb_out = fluid.layers.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype="float32"
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer))
self.sent_emb_out = sent_emb_out
name=scope_name+self._sent_emb_name, initializer=self._param_initializer))
emb_out = emb_out + position_emb_out + sent_emb_out
emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out
emb_out = pre_process_layer(
emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
emb_out, 'nd', self._prepostprocess_dropout, name=scope_name+'pre_encoder')
self_attn_mask = layers.matmul(
x = input_mask, y = input_mask, transpose_y = True)
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True)
self_attn_mask = layers.scale(
x = self_attn_mask, scale = 10000.0, bias = -1.0, bias_after_scale = False)
n_head_self_attn_mask = layers.stack(
self_attn_mask = fluid.layers.scale(
x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
n_head_self_attn_mask = fluid.layers.stack(
x=[self_attn_mask] * self._n_head, axis=1)
n_head_self_attn_mask.stop_gradient = True
enc_out = encoder(
enc_input = emb_out,
attn_bias = n_head_self_attn_mask,
n_layer = self._n_layer,
n_head = self._n_head,
d_key = self._emb_size // self._n_head,
d_value = self._emb_size // self._n_head,
d_model = self._emb_size,
d_inner_hid = self._emb_size * 4,
prepostprocess_dropout = self._prepostprocess_dropout,
attention_dropout = self._attention_dropout,
relu_dropout = 0,
hidden_act = self._hidden_act,
preprocess_cmd = "",
postprocess_cmd = "dan",
param_initializer = self._param_initializer,
name = self.model_name + 'encoder')
next_sent_feat = layers.slice(
input = enc_out, axes = [1], starts = [0], ends = [1])
next_sent_feat = layers.fc(
input = next_sent_feat,
size = self._emb_size,
act = "tanh",
param_attr = fluid.ParamAttr(
name = self.model_name + "pooled_fc.w_0",
initializer = self._param_initializer),
bias_attr = "pooled_fc.b_0")
return {'word_embedding': emb_out,
enc_input=emb_out,
attn_bias=n_head_self_attn_mask,
n_layer=self._n_layer,
n_head=self._n_head,
d_key=self._emb_size // self._n_head,
d_value=self._emb_size // self._n_head,
d_model=self._emb_size,
d_inner_hid=self._emb_size * 4,
prepostprocess_dropout=self._prepostprocess_dropout,
attention_dropout=self._attention_dropout,
relu_dropout=0,
hidden_act=self._hidden_act,
preprocess_cmd="",
postprocess_cmd="dan",
param_initializer=self._param_initializer,
name=scope_name+'encoder')
next_sent_feat = fluid.layers.slice(
input=enc_out, axes=[1], starts=[0], ends=[1])
next_sent_feat = fluid.layers.reshape(next_sent_feat, [-1, next_sent_feat.shape[-1]])
next_sent_feat = fluid.layers.fc(
input=next_sent_feat,
size=self._emb_size,
act="tanh",
param_attr=fluid.ParamAttr(
name=scope_name+"pooled_fc.w_0", initializer=self._param_initializer),
bias_attr=scope_name+"pooled_fc.b_0")
return {'embedding_table': embedding_table,
'word_embedding': emb_out,
'encoder_outputs': enc_out,
'sentence_embedding': next_sent_feat,
'sentence_pair_embedding': next_sent_feat}
......
......@@ -23,6 +23,35 @@ from functools import partial
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layer_helper import LayerHelper as LayerHelper
def layer_norm(x, begin_norm_axis=1, epsilon=1e-6, param_attr=None, bias_attr=None):
helper = LayerHelper('layer_norm', **locals())
mean = layers.reduce_mean(x, dim=begin_norm_axis, keep_dim=True)
shift_x = layers.elementwise_sub(x=x, y=mean, axis=0)
variance = layers.reduce_mean(layers.square(shift_x), dim=begin_norm_axis, keep_dim=True)
r_stdev = layers.rsqrt(variance + epsilon)
norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0)
param_shape = [reduce(lambda x, y: x * y, norm_x.shape[begin_norm_axis:])]
param_dtype = norm_x.dtype
scale = helper.create_parameter(
attr=param_attr,
shape=param_shape,
dtype=param_dtype,
default_initializer=fluid.initializer.Constant(1.))
bias = helper.create_parameter(
attr=bias_attr,
shape=param_shape,
dtype=param_dtype,
is_bias=True,
default_initializer=fluid.initializer.Constant(0.))
out = layers.elementwise_mul(x=norm_x, y=scale, axis=-1)
out = layers.elementwise_add(x=out, y=bias, axis=-1)
return out
def multi_head_attention(queries,
keys,
values,
......@@ -209,7 +238,7 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
out_dtype = out.dtype
if out_dtype == fluid.core.VarDesc.VarType.FP16:
out = layers.cast(x=out, dtype="float32")
out = layers.layer_norm(
out = layer_norm(
out,
begin_norm_axis=len(out.shape) - 1,
param_attr=fluid.ParamAttr(
......
......@@ -557,7 +557,7 @@ class Controller(object):
inst.task_layer['pred'] = pred_parad
pred_joint_input_names, pred_joint_shape_and_dtypes, name_to_position = merge_input_attrs(
pred_backbone.inputs_attr, inst.task_layer['pred'].inputs_attrs['reader'],
insert_taskid=False)
insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
pred_prog = inst.load(infer_model_path)
# pred_prog = fluid.CompiledProgram(pred_prog).with_data_parallel()
......@@ -664,9 +664,9 @@ class Controller(object):
"step_" + str(global_step))
fluid.io.save_persistables(self.exe, save_path, saver_program)
save_path = os.path.join(main_conf['save_path'],
"step_" + str(global_step) + "_final")
fluid.io.save_persistables(self.exe, save_path, saver_program)
# save_path = os.path.join(main_conf['save_path'],
# "step_" + str(global_step) + "_final")
# fluid.io.save_persistables(self.exe, save_path, saver_program)
def pred(self, task_instance, inference_model_dir=None):
if self._for_train:
......
......@@ -36,11 +36,13 @@ class Reader(reader):
self._batch_size = config['batch_size']
self._max_seq_len = config['max_seq_len']
self._num_classes = config['n_classes']
if phase == 'train':
self._input_file = config['train_file']
self._num_epochs = None # 防止iteartor终止
self._shuffle = config.get('shuffle', False)
self._shuffle_buffer = config.get('shuffle_buffer', 5000)
# self._shuffle_buffer = config.get('shuffle_buffer', 5000)
elif phase == 'eval':
self._input_file = config['dev_file']
self._num_epochs = 1
......@@ -54,7 +56,7 @@ class Reader(reader):
self._phase = phase
# self._batch_size =
self._print_first_n = config.get('print_first_n', 1)
self._print_first_n = config.get('print_first_n', 0)
@property
......@@ -91,6 +93,7 @@ class Reader(reader):
return outputs
for batch in self._data_generator():
print(batch)
yield list_to_dict(batch)
def get_epoch_outputs(self):
......
......@@ -15,6 +15,7 @@
from paddlepalm.interface import reader
from paddlepalm.reader.utils.reader4ernie import MaskLMReader
import numpy as np
class Reader(reader):
......@@ -65,7 +66,7 @@ class Reader(reader):
"input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1, -1, 1], 'int64'],
"mask_label": [[-1, 1], 'int64'],
"mask_pos": [[-1, 1], 'int64']
"mask_pos": [[-1, 1], 'int64'],
}
......@@ -78,9 +79,12 @@ class Reader(reader):
names = ['token_ids', 'position_ids', 'segment_ids', 'input_mask',
'task_ids', 'mask_label', 'mask_pos']
outputs = {n: i for n,i in zip(names, x)}
# outputs['batchsize_x_seqlen'] = [self._batch_size * len(outputs['token_ids'][0]) - 1]
return outputs
for batch in self._data_generator():
# print(np.shape(list_to_dict(batch)['token_ids']))
# print(list_to_dict(batch)['mask_label'].tolist())
yield list_to_dict(batch)
def get_epoch_outputs(self):
......
此差异已折叠。
......@@ -21,15 +21,29 @@ class TaskParadigm(task_paradigm):
'''
classification
'''
def __init___(self, config, phase):
def __init___(self, config, phase, backbone_config=None):
self._is_training = phase == 'train'
self.sent_emb_size = config['hidden_size']
self._hidden_size = backbone_config['hidden_size']
self.num_classes = config['n_classes']
if 'initializer_range' in config:
self._param_initializer = config['initializer_range']
else:
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=backbone_config.get('initializer_range', 0.02))
if 'dropout_prob' in config:
self._dropout_prob = config['dropout_prob']
else:
self._dropout_prob = backbone_config.get('hidden_dropout_prob', 0.0)
@property
def inputs_attrs(self):
return {'bakcbone': {"sentence_emb": [-1, self.sent_emb_size], 'float32']},
'reader': {"label_ids": [[-1, 1], 'int64']}}
if self._is_training:
reader = {"label_ids": [[-1, 1], 'int64']}
else:
reader = {}
bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']}
return {'reader': reader, 'backbone': bb}
@property
def outputs_attrs(self):
......@@ -39,22 +53,29 @@ class TaskParadigm(task_paradigm):
return {'logits': [-1, self.num_classes], 'float32'}
def build(self, **inputs):
sent_emb = inputs['backbone']['sentence_emb']
sent_emb = inputs['backbone']['sentence_embedding']
label_ids = inputs['reader']['label_ids']
if self._is_training:
cls_feats = fluid.layers.dropout(
x=sent_emb,
dropout_prob=self._dropout_prob,
dropout_implementation="upscale_in_train")
logits = fluid.layers.fc(
input=ent_emb
input=sent_emb,
size=self.num_classes,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.1)),
initializer=self._param_initializer),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
loss = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=label_ids)
loss = layers.mean(loss)
if self._is_training:
loss = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=label_ids)
loss = layers.mean(loss)
return {"loss": loss}
else:
return {"logits":logits}
......@@ -24,6 +24,17 @@ class TaskParadigm(task_paradigm):
def __init__(self, config, phase, backbone_config=None):
self._is_training = phase == 'train'
self._hidden_size = backbone_config['hidden_size']
if 'initializer_range' in config:
self._param_initializer = config['initializer_range']
else:
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=backbone_config.get('initializer_range', 0.02))
if 'dropout_prob' in config:
self._dropout_prob = config['dropout_prob']
else:
self._dropout_prob = backbone_config.get('hidden_dropout_prob', 0.0)
@property
def inputs_attrs(self):
......@@ -46,16 +57,18 @@ class TaskParadigm(task_paradigm):
labels = inputs["reader"]["label_ids"]
cls_feats = inputs["backbone"]["sentence_pair_embedding"]
cls_feats = fluid.layers.dropout(
x=cls_feats,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
if self._is_training:
cls_feats = fluid.layers.dropout(
x=cls_feats,
dropout_prob=self._dropout_prob,
dropout_implementation="upscale_in_train")
logits = fluid.layers.fc(
input=cls_feats,
size=2,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
initializer=self._param_initializer),
bias_attr=fluid.ParamAttr(
name="cls_out_b",
initializer=fluid.initializer.Constant(0.)))
......
......@@ -34,9 +34,11 @@ class TaskParadigm(task_paradigm):
def inputs_attrs(self):
reader = {
"mask_label": [[-1, 1], 'int64'],
"batchsize_x_seqlen": [[1], 'int64'],
"mask_pos": [[-1, 1], 'int64']}
if not self._is_training:
del reader['mask_label']
del reader['batchsize_x_seqlen']
bb = {
"encoder_outputs": [[-1, -1, self._hidden_size], 'float32'],
"embedding_table": [[-1, self._vocab_size, self._emb_size], 'float32']}
......@@ -52,6 +54,8 @@ class TaskParadigm(task_paradigm):
def build(self, inputs):
if self._is_training:
mask_label = inputs["reader"]["mask_label"]
# 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"]
mask_pos = inputs["reader"]["mask_pos"]
word_emb = inputs["backbone"]["embedding_table"]
enc_out = inputs["backbone"]["encoder_outputs"]
......@@ -61,7 +65,12 @@ class TaskParadigm(task_paradigm):
_param_initializer = fluid.initializer.TruncatedNormal(
scale=self._initializer_range)
mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
if self._is_training:
# 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
#mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen)
#print(fluid.default_main_program().blocks[0].vars)
reshaped_emb_out = fluid.layers.reshape(
x=enc_out, shape=[-1, emb_size])
......
......@@ -143,6 +143,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
def iterator():
v = verbose
has_show_warn = False
while True:
id = np.random.choice(task_ids, p=weights)
results = fake_batch
......@@ -150,16 +151,37 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
print('----- debug joint iterator -----')
print('sampled task id: '+str(id))
task_id_tensor = np.array([[id]]).astype("int64")
results[0] = task_id_tensor
# results[0] = task_id_tensor
for i in range(dev_count):
results[0] = task_id_tensor
# 这两个应该是等价的
# results[0] = task_id_tensor
results[outname_to_pos['__task_id']] = task_id_tensor
assert outname_to_pos['__task_id'] == 0
if id in outbuf:
outputs = outbuf[id]
del outbuf[id]
else:
outputs = next(iterators[id]) # dict type
# if 'token_ids' in outputs:
# val1 = len(outputs['token_ids'])
# val = _check_and_adapt_shape_dtype([val1], [[1], 'int64'])
# results[outname_to_pos['batch_size']] = val
# val2 = len(outputs['token_ids'][0])
# val = _check_and_adapt_shape_dtype([val2], [[1], 'int64'])
# results[outname_to_pos['seqlen']] = val
# val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64'])
# results[outname_to_pos['batchsize_x_seqlen']] = val
# else:
# if not has_show_warn:
# print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)')
# has_show_warn = True
prefix = iterator_prefixes[id]
for outname, val in outputs.items():
if v > 0:
......@@ -192,7 +214,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
return iterator
def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True):
def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False):
"""
Args:
task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks
......@@ -200,14 +222,28 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True):
if isinstance(task_attrs, dict):
task_attrs = [task_attrs]
ret = []
names = []
start = 0
if insert_taskid:
ret = [([1,1], 'int64')]
names = ['__task_id']
start = 1
else:
ret = []
names = []
start = 0
ret.append(([1,1], 'int64'))
names.append('__task_id')
start += 1
if insert_batchsize:
ret.append(([1], 'int64'))
names.append('batch_size')
start += 1
if insert_seqlen:
ret.append(([1], 'int64'))
names.append('seqlen')
start += 1
if insert_batchsize_x_seqlen:
ret.append(([1], 'int64'))
names.append('batchsize_x_seqlen')
start += 1
names += sorted(backbone_attr.keys())
ret.extend([backbone_attr[k] for k in names[start:]])
......
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export CUDA_VISIBLE_DEVICES=0
python demo1.py
export CUDA_VISIBLE_DEVICES=0
python demo2.py
python -u demo2.py
# GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1
export CUDA_VISIBLE_DEVICES=0
python demo3.py
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册