dense_utils.py 707 字节
Newer Older
1 2 3
import numpy as np
import pandas as pd

4
from ..wdtypes import *
5 6


7
pd.options.mode.chained_assignment = None
8

9
def label_encoder(df_inp:pd.DataFrame, cols:Optional[List[str]]=None,
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
    val_to_idx:Optional[Dict[str,Dict[str,int]]]=None):

    df = df_inp.copy()
    if cols == None:
        cols = list(df.select_dtypes(include=['object']).columns)

    if not val_to_idx:
        val_types = dict()
        for c in cols:
            val_types[c] = df[c].unique()
        val_to_idx = dict()
        for k, v in val_types.items():
            val_to_idx[k] = {o: i for i, o in enumerate(val_types[k])}

    for k, v in val_to_idx.items():
        df[k] = df[k].apply(lambda x: v[x])

27
    return df, val_to_idx