utils.py 672 字节
Newer Older
S
Superjom 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
import logging

UNK = 0

logger = logging.getLogger("logger")
logger.setLevel(logging.INFO)


class TaskType:
    '''
    type of DSSM's task.
    '''
    # pairwise rank.
    RANK = 0
    # classification.
    CLASSFICATION = 1


def sent2ids(sent, vocab):
    '''
    transform a sentence to a list of ids.

    @sent: str
        a sentence.
    @vocab: dict
        a word dic
    '''
    return [vocab.get(w, UNK) for w in sent.split()]


def load_dic(path):
    '''
    word dic format:
      each line is a word
    '''
    dic = {}
    with open(path) as f:
        for id, line in enumerate(f):
            w = line.strip()
            dic[w] = id
    return dic