提交 7ec3cd55 编写于 作者: J jrzaurin

Modified documentation and added doc test. Also changed some of the code...

Modified documentation and added doc test. Also changed some of the code according to the increase of test coverage
上级 bbc66f04
......@@ -79,12 +79,7 @@ class WidePreprocessor(BasePreprocessor):
[2, 5],
[3, 6]])
>>> wide_preprocessor.feature_dict
{'color_r': 1,
'color_b': 2,
'color_g': 3,
'color_size_r-s': 4,
'color_size_b-n': 5,
'color_size_g-l': 6}
{'color_r': 1, 'color_b': 2, 'color_g': 3, 'color_size_r-s': 4, 'color_size_b-n': 5, 'color_size_g-l': 6}
>>> wide_preprocessor.inverse_transform(X_wide)
color color_size
0 r r-s
......@@ -449,14 +444,18 @@ class ImagePreprocessor(BasePreprocessor):
Examples
--------
>>> import pandas as pd
>>>
>>> from pytorch_widedeep.preprocessing import ImagePreprocessor
>>> df_train = pd.DataFrame({'images_column': ['galaxy1.png', 'galaxy2.png']})
>>> df_test = pd.DataFrame({'images_column': ['galaxy3.png']})
>>>
>>> path_to_image1 = 'tests/test_data_utils/images/galaxy1.png'
>>> path_to_image2 = 'tests/test_data_utils/images/galaxy2.png'
>>>
>>> df_train = pd.DataFrame({'images_column': [path_to_image1]})
>>> df_test = pd.DataFrame({'images_column': [path_to_image2]})
>>> img_preprocessor = ImagePreprocessor(img_col='images_column', img_path='.', verbose=0)
>>> resized_images = img_preprocessor.fit_transform(df_train)
>>> new_resized_images = img_preprocessor.transform(df_train)
.. note:: Normalising metrics will only be computed when the
``fit_transform`` method is run. Running ``transform`` only will not
change the computed metrics and running ``fit`` only simply
......
......@@ -293,8 +293,7 @@ class Tokenizer:
>>> texts = ['Machine learning is great', 'but building stuff is even better']
>>> tok = Tokenizer()
>>> tok.process_all(texts)
[['xxmaj', 'machine', 'learning', 'is', 'great'],
['but', 'building', 'stuff', 'is', 'even', 'better']]
[['xxmaj', 'machine', 'learning', 'is', 'great'], ['but', 'building', 'stuff', 'is', 'even', 'better']]
.. note:: Note the token ``TK_MAJ`` (`xxmaj`), used to indicate the
next word begins with a capital in the original text. For more
......@@ -369,9 +368,6 @@ class Vocab:
>>> texts = ['Machine learning is great', 'but building stuff is even better']
>>> tokens = Tokenizer().process_all(texts)
>>> vocab = Vocab.create(tokens, max_vocab=18, min_freq=1)
>>> print(vocab.itos)
['xxunk', 'xxpad', 'xxbos', 'xxeos', 'xxfld', 'xxmaj', 'xxup', 'xxrep', 'xxwrep',
'is', 'machine', 'learning', 'great', 'but', 'building', 'stuff', 'even', 'better']
>>> vocab.numericalize(['machine', 'learning', 'is', 'great'])
[10, 11, 9, 12]
>>> vocab.textify([10, 11, 9, 12])
......
......@@ -45,7 +45,7 @@ class AspectAwarePreprocessor:
--------
>>> import cv2
>>> from pytorch_widedeep.utils import AspectAwarePreprocessor
>>> img = cv2.imread("galaxy.png")
>>> img = cv2.imread("tests/test_data_utils/images/galaxy1.png")
>>> img.shape
(694, 890, 3)
>>> app = AspectAwarePreprocessor(width=224, height=224)
......
......@@ -123,7 +123,7 @@ def pad_sequences(
def build_embeddings_matrix(
vocab: Vocab, word_vectors_path: str, min_freq: int, verbose: int = 1
) -> np.ndarray:
) -> np.ndarray: # pragma: no cover
r"""
Build the embedding matrix using pretrained word vectors
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册