#edit-mode: -*- python -*- # Copyright (c) 2016 Baidu, Inc. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later. TrainData(ProtoData( files = 'trainer/tests/train_files.txt', usage_ratio = 1.0, )) TestData(ProtoData( files = 'trainer/tests/test_files.txt' )) default_initial_std(1) default_decay_rate(4e-4) default_device(0) Inputs("features", "word", "pos", "chunk") Outputs("crf") Layer( name = "features", type = "data", size = 4339, ) Layer( name = "word", type = "data", size = 478, ) Layer( name = "pos", type = "data", size = 45 ) Layer( name = "chunk", type = "data", size = 23 ) Layer( name = "output", type = "mixed", size = 23, bias = False, device = -1, inputs = [ FullMatrixProjection("features", parameter_name="feature_weights"), # TableProjection("word"), # TableProjection("pos"), ], ) Layer( name = "crf", type = "crf", size = 23, device = -1, inputs = [ Input("output", parameter_name="crfw"), "chunk" ] ) Layer( name = "crf_decoding", type = "crf_decoding", size = 23, device = -1, inputs = [ Input("output", parameter_name="crfw"), "chunk" ] ) Evaluator( name = "error", type = "sum", inputs = "crf_decoding", ) ''' # chuck evaluator cannot be used for GPU training Evaluator( name = "chunk_f1", type = "chunk", inputs = ["crf_decoding", "chunk"], chunk_scheme = "IOB", num_chunk_types = 11, ) ''' Settings( algorithm = 'sgd', batch_size = 100, average_window = 0.5, max_average_window = 2500, learning_rate = 1e-1, learning_rate_decay_a = 5e-7, learning_rate_decay_b = 0.75, l1weight = 0, l2weight = 1, c1 = 0.0001, backoff = 0.5, owlqn_steps = 100, max_backoff = 5, )