diff --git a/paddleslim/nas/darts/search_space/conv_bert/cls.py b/paddleslim/nas/darts/search_space/conv_bert/cls.py index 7ec012b649d19e093bbadda9c02c7b43732dedb0..d2452efb338dc046e16037255507fcdccff9deae 100644 --- a/paddleslim/nas/darts/search_space/conv_bert/cls.py +++ b/paddleslim/nas/darts/search_space/conv_bert/cls.py @@ -91,19 +91,6 @@ class AdaBERTClassifier(Layer): use_fixed_gumbel=self.use_fixed_gumbel, gumbel_alphas=gumbel_alphas) - for s_emb, t_emb in zip(self.student.emb_names(), - self.teacher.emb_names()): - t_emb.stop_gradient = True - if fix_emb: - s_emb.stop_gradient = True - print( - "Assigning embedding[{}] from teacher to embedding[{}] in student.". - format(t_emb.name, s_emb.name)) - fluid.layers.assign(input=t_emb, output=s_emb) - print( - "Assigned embedding[{}] from teacher to embedding[{}] in student.". - format(t_emb.name, s_emb.name)) - fix_emb = False for s_emb, t_emb in zip(self.student.emb_names(), self.teacher.emb_names()): @@ -173,4 +160,3 @@ class AdaBERTClassifier(Layer): total_loss = (1 - self._gamma) * ce_loss + self._gamma * kd_loss return total_loss, accuracy, ce_loss, kd_loss, s_logits - diff --git a/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py b/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py index 3d8b6a3fc7c934706429222715e016805ad3274a..83a9b21d9ae3215983ef768206ddbebc4bfaa618 100755 --- a/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py +++ b/paddleslim/nas/darts/search_space/conv_bert/model/transformer_encoder.py @@ -262,7 +262,6 @@ class EncoderLayer(Layer): default_initializer=NormalInitializer( loc=0.0, scale=1e-3)) - self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) self.bns = [] self.outs = [] @@ -305,21 +304,6 @@ class EncoderLayer(Layer): def forward(self, enc_input_0, enc_input_1, epoch, flops=[], model_size=[]): -======= - self.outs.append(out) - - self.use_fixed_gumbel = use_fixed_gumbel - self.gumbel_alphas = gumbel_softmax(self.alphas) - if gumbel_alphas is not None: - self.gumbel_alphas = np.array(gumbel_alphas).reshape( - self.alphas.shape) - else: - self.gumbel_alphas = gumbel_softmax(self.alphas) - self.gumbel_alphas.stop_gradient = True - - print("gumbel_alphas: {}".format(self.gumbel_alphas)) - - def forward(self, enc_input_0, enc_input_1, flops=[], model_size=[]): alphas = self.gumbel_alphas if self.use_fixed_gumbel else gumbel_softmax( self.alphas, epoch)