未验证 提交 4e562503 编写于 作者: saxon_zh's avatar saxon_zh 提交者: GitHub

增加高层API相关API文档架构 (#2590)

* add paddle.vision.transforms.* api cn doc

* add alias for paddle.vision.transforms

* delete invalid file

* add some empty doc file for rd

* delete some text alias
上级 926f4645
...@@ -506,29 +506,70 @@ paddle.fluid.layers.center_loss paddle.nn.functional.center_loss,paddle.nn.funct ...@@ -506,29 +506,70 @@ paddle.fluid.layers.center_loss paddle.nn.functional.center_loss,paddle.nn.funct
paddle.nn.functional.input.one_hot paddle.nn.functional.one_hot paddle.nn.functional.input.one_hot paddle.nn.functional.one_hot
paddle.tensor.creation.full paddle.full,paddle.tensor.full paddle.tensor.creation.full paddle.full,paddle.tensor.full
paddle.fluid.layers.soft_relu paddle.nn.functional.soft_relu,paddle.nn.functional.activation.soft_relu paddle.fluid.layers.soft_relu paddle.nn.functional.soft_relu,paddle.nn.functional.activation.soft_relu
paddle.vision.transforms.transforms.Compose paddle.vision.transforms.Compose paddle.hapi.model.Model paddle.Model
paddle.vision.transforms.transforms.BatchCompose paddle.vision.transforms.BatchCompose paddle.hapi.callbacks.Callback paddle.callbacks.Callback
paddle.vision.transforms.transforms.Resize paddle.vision.transforms.Resize paddle.hapi.callbacks.ProgBarLogger paddle.callbacks.ProgBarLogger
paddle.vision.transforms.transforms.RandomResizedCrop paddle.vision.transforms.RandomResizedCrop paddle.hapi.callbacks.ModelCheckpoint paddle.callbacks.ModelCheckpoint
paddle.vision.transforms.transforms.CenterCropResize paddle.vision.transforms.CenterCropResize paddle.hapi.model_summary.summary paddle.summary
paddle.vision.transforms.transforms.CenterCrop paddle.vision.transforms.CenterCrop paddle.vision.models.resnet.ResNet paddle.vision.models.ResNet,paddle.vision.ResNet
paddle.vision.transforms.transforms.RandomHorizontalFlip paddle.vision.transforms.RandomHorizontalFlip paddle.vision.models.resnet.resnet18 paddle.vision.models.resnet18,paddle.vision.resnet18
paddle.vision.transforms.transforms.RandomVerticalFlip paddle.vision.transforms.RandomVerticalFlip paddle.vision.models.resnet.resnet34 paddle.vision.models.resnet34,paddle.vision.resnet34
paddle.vision.transforms.transforms.Permute paddle.vision.transforms.Permute paddle.vision.models.resnet.resnet50 paddle.vision.models.resnet50,paddle.vision.resnet50
paddle.vision.transforms.transforms.Normalize paddle.vision.transforms.Normalize paddle.vision.models.resnet.resnet101 paddle.vision.models.resnet101,paddle.vision.resnet101
paddle.vision.transforms.transforms.GaussianNoise paddle.vision.transforms.GaussianNoise paddle.vision.models.resnet.resnet152 paddle.vision.models.resnet152,paddle.vision.resnet152
paddle.vision.transforms.transforms.BrightnessTransform paddle.vision.transforms.BrightnessTransform paddle.vision.models.vgg.VGG paddle.vision.models.VGG,paddle.vision.VGG
paddle.vision.transforms.transforms.SaturationTransform paddle.vision.transforms.SaturationTransform paddle.vision.models.vgg.vgg11 paddle.vision.models.vgg11,paddle.vision.vgg11
paddle.vision.transforms.transforms.ContrastTransform paddle.vision.transforms.ContrastTransform paddle.vision.models.vgg.vgg13 paddle.vision.models.vgg13,paddle.vision.vgg13
paddle.vision.transforms.transforms.HueTransform paddle.vision.transforms.HueTransform paddle.vision.models.vgg.vgg16 paddle.vision.models.vgg16,paddle.vision.vgg16
paddle.vision.transforms.transforms.ColorJitter paddle.vision.transforms.ColorJitter paddle.vision.models.vgg.vgg19 paddle.vision.models.vgg19,paddle.vision.vgg19
paddle.vision.transforms.transforms.RandomCrop paddle.vision.transforms.RandomCrop paddle.vision.models.mobilenetv1.MobileNetV1 paddle.vision.models.MobileNetV1,paddle.vision.MobileNetV1
paddle.vision.transforms.transforms.RandomErasing paddle.vision.transforms.RandomErasing paddle.vision.models.mobilenetv1.mobilenet_v1 paddle.vision.models.mobilenet_v1,paddle.vision.mobilenet_v1
paddle.vision.transforms.transforms.Pad paddle.vision.transforms.Pad paddle.vision.models.mobilenetv2.MobileNetV2 paddle.vision.models.MobileNetV2,paddle.vision.MobileNetV2
paddle.vision.transforms.transforms.RandomRotate paddle.vision.transforms.RandomRotate paddle.vision.models.mobilenetv2.mobilenet_v2 paddle.vision.models.mobilenet_v2,paddle.vision.mobilenet_v2
paddle.vision.transforms.transforms.Grayscale paddle.vision.transforms.Grayscale paddle.vision.models.lenet.LeNet paddle.vision.models.LeNet,paddle.vision.LeNet
paddle.vision.transforms.functional.flip paddle.vision.transforms.flip paddle.vision.transforms.transforms.Compose paddle.vision.transforms.Compose,paddle.vision.Compose
paddle.vision.transforms.functional.resize paddle.vision.transforms.resize paddle.vision.transforms.transforms.BatchCompose paddle.vision.transforms.BatchCompose,paddle.vision.BatchCompose
paddle.vision.transforms.functional.pad paddle.vision.transforms.pad paddle.vision.transforms.transforms.Resize paddle.vision.transforms.Resize,paddle.vision.Resize
paddle.vision.transforms.functional.rotate paddle.vision.transforms.rotate paddle.vision.transforms.transforms.RandomResizedCrop paddle.vision.transforms.RandomResizedCrop,paddle.vision.RandomResizedCrop
paddle.vision.transforms.functional.to_grayscale paddle.vision.transforms.to_grayscale paddle.vision.transforms.transforms.CenterCropResize paddle.vision.transforms.CenterCropResize,paddle.vision.CenterCropResize
\ No newline at end of file paddle.vision.transforms.transforms.CenterCrop paddle.vision.transforms.CenterCrop,paddle.vision.CenterCrop
paddle.vision.transforms.transforms.RandomHorizontalFlip paddle.vision.transforms.RandomHorizontalFli,paddle.vision.RandomHorizontalFli
paddle.vision.transforms.transforms.RandomVerticalFlip paddle.vision.transforms.RandomVerticalFlip,paddle.vision.RandomVerticalFlip
paddle.vision.transforms.transforms.Permute paddle.vision.transforms.Permute,paddle.vision.Permute
paddle.vision.transforms.transforms.Normalize paddle.vision.transforms.Normalize,paddle.vision.Normalize
paddle.vision.transforms.transforms.GaussianNoise paddle.vision.transforms.GaussianNoise,paddle.vision.GaussianNoise
paddle.vision.transforms.transforms.BrightnessTransform paddle.vision.transforms.BrightnessTransform,paddle.vision.BrightnessTransform
paddle.vision.transforms.transforms.SaturationTransform paddle.vision.transforms.SaturationTransform,paddle.vision.SaturationTransform
paddle.vision.transforms.transforms.ContrastTransform paddle.vision.transforms.ContrastTransform,paddle.vision.ContrastTransform
paddle.vision.transforms.transforms.HueTransform paddle.vision.transforms.HueTransform,paddle.vision.HueTransform
paddle.vision.transforms.transforms.ColorJitter paddle.vision.transforms.ColorJitter,paddle.vision.ColorJitter
paddle.vision.transforms.transforms.RandomCrop paddle.vision.transforms.RandomCrop,paddle.vision.RandomCrop
paddle.vision.transforms.transforms.RandomErasing paddle.vision.transforms.RandomErasing,paddle.vision.RandomErasing
paddle.vision.transforms.transforms.Pad paddle.vision.transforms.Pad,paddle.vision.Pad
paddle.vision.transforms.transforms.RandomRotate paddle.vision.transforms.RandomRotate,paddle.vision.RandomRotate
paddle.vision.transforms.transforms.Grayscale paddle.vision.transforms.Grayscale,paddle.vision.Grayscale
paddle.vision.transforms.functional.flip paddle.vision.transforms.flip,paddle.vision.flip
paddle.vision.transforms.functional.resize paddle.vision.transforms.resize,paddle.vision.resize
paddle.vision.transforms.functional.pad paddle.vision.transforms.pad,paddle.vision.pad
paddle.vision.transforms.functional.rotate paddle.vision.transforms.rotate,paddle.vision.rotate
paddle.vision.transforms.functional.to_grayscale paddle.vision.transforms.to_grayscale,paddle.vision.to_grayscale
paddle.vision.datasets.folder.DatasetFolder paddle.vision.datasets.DatasetFolder,paddle.vision.DatasetFolder
paddle.vision.datasets.folder.ImageFolder paddle.vision.datasets.ImageFolder,paddle.vision.ImageFolder
paddle.vision.datasets.mnist.MNIST paddle.vision.datasets.MNIST,paddle.vision.MNIST
paddle.vision.datasets.flowers.Flowers paddle.vision.datasets.Flowers,paddle.vision.Flowers
paddle.vision.datasets.cifar.Cifar10 paddle.vision.datasets.Cifar10,paddle.vision.Cifar10
paddle.vision.datasets.cifar.Cifar100 paddle.vision.datasets.Cifar100,paddle.vision.Cifar100
paddle.vision.datasets.voc2012.VOC2012 paddle.vision.datasets.VOC2012,paddle.vision.VOC2012
paddle.text.datasets.conll05.Conll05st paddle.text.datasets.Conll05st,paddle.text.Conll05st
paddle.text.datasets.imdb.Imdb paddle.text.datasets.Imdb,paddle.text.Imdb
paddle.text.datasets.imikolov.Imikolov paddle.text.datasets.Imikolov,paddle.text.Imikolov
paddle.text.datasets.movielens.Movielens paddle.text.datasets.Movielens,paddle.text.Movielens
paddle.text.datasets.movie_reviews.MovieReviews paddle.text.datasets.MovieRevie,paddle.text.MovieRevie
paddle.text.datasets.uci_housing.UCIHousing paddle.text.datasets.UCIHousing,paddle.text.UCIHousing
paddle.text.datasets.wmt14.WMT14 paddle.text.datasets.WMT14,paddle.text.WMT14
paddle.text.datasets.wmt16.WMT16 paddle.text.datasets.WMT16,paddle.text.WMT16
paddle.metric.metrics.Metric paddle.metric.Metric
paddle.metric.metrics.Accuracy paddle.metric.Accuracy
paddle.metric.metrics.Precision paddle.metric.Precision
paddle.metric.metrics.Recall paddle.metric.Recall
paddle.metric.metrics.Auc paddle.metric.Auc
.. _cn_api_io_cn_Dataset: .. _cn_api_io_Dataset:
Dataset Dataset
------------------------------- -------------------------------
:doc_source: paddle.fluid.dataloader.Dataset
.. py:class:: paddle.io.Dataset()
待补充
.. _cn_api_io_IterableDataset:
IterableDataset
-------------------------------
.. py:class:: paddle.io.IterableDataset()
待补充
参数
:::::::::
代码示例
:::::::::
.. code-block:: python
\ No newline at end of file
.. _cn_api_fluid_metrics_Precision:
Precision
-------------------------------
.. py:class:: paddle.fluid.metrics.Precision(name=None)
精确率Precision(也称为 positive predictive value,正预测值)是被预测为正样例中实际为正的比例。 https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers 该类管理二分类任务的precision分数。
**代码示例**
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
metric = fluid.metrics.Precision()
# 生成预测值和标签
preds = [[0.1], [0.7], [0.8], [0.9], [0.2],
[0.2], [0.3], [0.5], [0.8], [0.6]]
labels = [[0], [1], [1], [1], [1],
[0], [0], [0], [0], [0]]
preds = np.array(preds)
labels = np.array(labels)
metric.update(preds=preds, labels=labels)
precision = metric.eval()
print("expected precision: %.2f and got %.2f" % ( 3.0 / 5.0, precision))
.. py:method:: update(preds, labels)
使用当前mini-batch的预测结果更新精确率的计算。
参数:
- **preds** (numpy.array) - 当前mini-batch的预测结果,二分类sigmoid函数的输出,shape为[batch_size, 1],数据类型为'float64'或'float32'。
- **labels** (numpy.array) - 当前mini-batch的真实标签,输入的shape应与preds保持一致,shape为[batch_size, 1],数据类型为'int32'或'int64'
返回:无
.. py:method:: eval()
计算出最终的精确率。
参数:无
返回: 精确率的计算结果。标量输出,float类型
返回类型:float
.. _cn_api_fluid_metrics_Recall:
Recall
-------------------------------
.. py:class:: paddle.fluid.metrics.Recall(name=None)
召回率Recall(也称为敏感度)是指得到的相关实例数占相关实例总数的比例。https://en.wikipedia.org/wiki/Precision_and_recall 该类管理二分类任务的召回率。
**代码示例**
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
metric = fluid.metrics.Recall()
# 生成预测值和标签
preds = [[0.1], [0.7], [0.8], [0.9], [0.2],
[0.2], [0.3], [0.5], [0.8], [0.6]]
labels = [[0], [1], [1], [1], [1],
[0], [0], [0], [0], [0]]
preds = np.array(preds)
labels = np.array(labels)
metric.update(preds=preds, labels=labels)
recall = metric.eval()
print("expected recall: %.2f and got %.2f" % ( 3.0 / 4.0, recall))
.. py:method:: update(preds, labels)
使用当前mini-batch的预测结果更新召回率的计算。
参数:
- **preds** (numpy.array) - 当前mini-batch的预测结果,二分类sigmoid函数的输出,shape为[batch_size, 1],数据类型为'float64'或'float32'。
- **labels** (numpy.array) - 当前mini-batch的真实标签,输入的shape应与preds保持一致,shape为[batch_size, 1],数据类型为'int32'或'int64'
返回:无
.. py:method:: eval()
计算出最终的召回率。
参数:无
返回:召回率的计算结果。标量输出,float类型
返回类型:float
.. _cn_api_metric_Accuracy:
Accuracy
-------------------------------
.. py:class:: paddle.metric.Accuracy()
Encapsulates accuracy metric logic.
参数
:::::::::
topk (int|tuple(int)): Number of top elements to look at
for computing accuracy. Default is (1,).
name (str, optional): String name of the metric instance. Default
is `acc`.
Example by standalone:
.. code-block:: python
import numpy as np
import paddle
paddle.disable_static()
x = paddle.to_tensor(np.array([
[0.1, 0.2, 0.3, 0.4],
[0.1, 0.4, 0.3, 0.2],
[0.1, 0.2, 0.4, 0.3],
[0.1, 0.2, 0.3, 0.4]]))
y = paddle.to_tensor(np.array([[0], [1], [2], [3]]))
m = paddle.metric.Accuracy()
correct = m.compute(x, y)
m.update(correct)
res = m.accumulate()
print(res) # 0.75
Example with Model API:
.. code-block:: python
import paddle
paddle.disable_static()
train_dataset = paddle.vision.datasets.MNIST(mode='train')
model = paddle.Model(paddle.vision.LeNet(classifier_activation=None))
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())
model.prepare(
optim,
loss=paddle.nn.CrossEntropyLoss(),
metrics=paddle.metric.Accuracy())
model.fit(train_dataset, batch_size=64)
\ No newline at end of file
.. _cn_api_metric_Auc:
Auc
-------------------------------
.. py:class:: paddle.metric.Auc()
The auc metric is for binary classification.
Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve.
Please notice that the auc metric is implemented with python, which may be a little bit slow.
The `auc` function creates four local variables, `true_positives`,
`true_negatives`, `false_positives` and `false_negatives` that are used to
compute the AUC. To discretize the AUC curve, a linearly spaced set of
thresholds is used to compute pairs of recall and precision values. The area
under the ROC-curve is therefore computed using the height of the recall
values by the false positive rate, while the area under the PR-curve is the
computed using the height of the precision values by the recall.
参数
:::::::::
curve (str): Specifies the mode of the curve to be computed,
'ROC' or 'PR' for the Precision-Recall-curve. Default is 'ROC'.
num_thresholds (int): The number of thresholds to use when
discretizing the roc curve. Default is 4095.
'ROC' or 'PR' for the Precision-Recall-curve. Default is 'ROC'.
name (str, optional): String name of the metric instance. Default
is `auc`.
"NOTE: only implement the ROC curve type via Python now."
Example by standalone:
.. code-block:: python
import numpy as np
import paddle
m = paddle.metric.Auc()
n = 8
class0_preds = np.random.random(size = (n, 1))
class1_preds = 1 - class0_preds
preds = np.concatenate((class0_preds, class1_preds), axis=1)
labels = np.random.randint(2, size = (n, 1))
m.update(preds=preds, labels=labels)
res = m.accumulate()
Example with Model API:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
class Data(paddle.io.Dataset):
def __init__(self):
super(Data, self).__init__()
self.n = 1024
self.x = np.random.randn(self.n, 10).astype('float32')
self.y = np.random.randint(2, size=(self.n, 1)).astype('int64')
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
def __len__(self):
return self.n
paddle.disable_static()
model = paddle.Model(nn.Sequential(
nn.Linear(10, 2), nn.Softmax())
)
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())
def loss(x, y):
return nn.functional.nll_loss(paddle.log(x), y)
model.prepare(
optim,
loss=loss,
metrics=paddle.metric.Auc())
data = Data()
model.fit(data, batch_size=16)
\ No newline at end of file
.. _cn_api_metric_Metric:
Metric
-------------------------------
.. py:class:: paddle.metric.Metric()
Base class for metric, encapsulates metric logic and APIs
Usage:
m = SomeMetric()
for prediction, label in ...:
m.update(prediction, label)
m.accumulate()
Advanced usage for :code:`compute`:
Metric calculation can be accelerated by calculating metric states
from model outputs and labels by build-in operators not by Python/NumPy
in :code:`compute`, metric states will be fetched as NumPy array and
call :code:`update` with states in NumPy format.
Metric calculated as follows (operations in Model and Metric are
indicated with curly brackets, while data nodes not):
inputs & labels || ------------------
| ||
{model} ||
| ||
outputs & labels ||
| || tensor data
{Metric.compute} ||
| ||
metric states(tensor) ||
| ||
{fetch as numpy} || ------------------
| ||
metric states(numpy) || numpy data
| ||
{Metric.update} \/ ------------------
代码示例
:::::::::
For :code:`Accuracy` metric, which takes :code:`pred` and :code:`label`
as inputs, we can calculate the correct prediction matrix between
:code:`pred` and :code:`label` in :code:`compute`.
For examples, prediction results contains 10 classes, while :code:`pred`
shape is [N, 10], :code:`label` shape is [N, 1], N is mini-batch size,
and we only need to calculate accurary of top-1 and top-5, we could
calculate the correct prediction matrix of the top-5 scores of the
prediction of each sample like follows, while the correct prediction
matrix shape is [N, 5].
.. code-block:: python
def compute(pred, label):
# sort prediction and slice the top-5 scores
pred = paddle.argsort(pred, descending=True)[:, :5]
# calculate whether the predictions are correct
correct = pred == label
return paddle.cast(correct, dtype='float32')
With the :code:`compute`, we split some calculations to OPs (which
may run on GPU devices, will be faster), and only fetch 1 tensor with
shape as [N, 5] instead of 2 tensors with shapes as [N, 10] and [N, 1].
:code:`update` can be define as follows:
.. code-block:: python
def update(self, correct):
accs = []
for i, k in enumerate(self.topk):
num_corrects = correct[:, :k].sum()
num_samples = len(correct)
accs.append(float(num_corrects) / num_samples)
self.total[i] += num_corrects
self.count[i] += num_samples
return accs
\ No newline at end of file
.. _cn_api_metric_Precision:
Precision
-------------------------------
.. py:class:: paddle.metric.Precision()
Precision (also called positive predictive value) is the fraction of
relevant instances among the retrieved instances. Refer to
https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers
Noted that this class manages the precision score only for binary
classification task.
参数
:::::::::
name (str, optional): String name of the metric instance.
Default is `precision`.
Example by standalone:
.. code-block:: python
import numpy as np
import paddle
x = np.array([0.1, 0.5, 0.6, 0.7])
y = np.array([0, 1, 1, 1])
m = paddle.metric.Precision()
m.update(x, y)
res = m.accumulate()
print(res) # 1.0
Example with Model API:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
class Data(paddle.io.Dataset):
def __init__(self):
super(Data, self).__init__()
self.n = 1024
self.x = np.random.randn(self.n, 10).astype('float32')
self.y = np.random.randint(2, size=(self.n, 1)).astype('float32')
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
def __len__(self):
return self.n
paddle.disable_static()
model = paddle.Model(nn.Sequential(
nn.Linear(10, 1),
nn.Sigmoid()
))
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())
model.prepare(
optim,
loss=nn.BCELoss(),
metrics=paddle.metric.Precision())
data = Data()
model.fit(data, batch_size=16)
\ No newline at end of file
.. _cn_api_metric_Recall:
Recall
-------------------------------
.. py:class:: paddle.metric.Recall()
Recall (also known as sensitivity) is the fraction of
relevant instances that have been retrieved over the
total amount of relevant instances
Refer to:
https://en.wikipedia.org/wiki/Precision_and_recall
Noted that this class manages the recall score only for
binary classification task.
参数
:::::::::
name (str, optional): String name of the metric instance.
Default is `recall`.
Example by standalone:
.. code-block:: python
import numpy as np
import paddle
x = np.array([0.1, 0.5, 0.6, 0.7])
y = np.array([1, 0, 1, 1])
m = paddle.metric.Recall()
m.update(x, y)
res = m.accumulate()
print(res) # 2.0 / 3.0
Example with Model API:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
class Data(paddle.io.Dataset):
def __init__(self):
super(Data, self).__init__()
self.n = 1024
self.x = np.random.randn(self.n, 10).astype('float32')
self.y = np.random.randint(2, size=(self.n, 1)).astype('float32')
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
def __len__(self):
return self.n
paddle.disable_static()
model = paddle.Model(nn.Sequential(
nn.Linear(10, 1),
nn.Sigmoid()
))
optim = paddle.optimizer.Adam(
learning_rate=0.001, parameters=model.parameters())
model.prepare(
optim,
loss=nn.BCELoss(),
metrics=[paddle.metric.Precision(), paddle.metric.Recall()])
data = Data()
model.fit(data, batch_size=16)
\ No newline at end of file
.. _cn_api_text_datasets_Conll05st:
Conll05st
-------------------------------
.. py:class:: paddle.text.datasets.Conll05st()
Implementation of `Conll05st <https://www.cs.upc.edu/~srlconll/soft.html>`_
test dataset.
Note: only support download test dataset automatically for that
only test dataset of Conll05st is public.
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
word_dict_file(str): path to word dictionary file, can be set None if
:attr:`download` is True. Default None
verb_dict_file(str): path to verb dictionary file, can be set None if
:attr:`download` is True. Default None
target_dict_file(str): path to target dictionary file, can be set None if
:attr:`download` is True. Default None
emb_file(str): path to embedding dictionary file, only used for
:code:`get_embedding` can be set None if :attr:`download` is
True. Default None
download(bool): whether to download dataset automatically if
:attr:`data_file` :attr:`word_dict_file` :attr:`verb_dict_file`
:attr:`target_dict_file` is not set. Default True
Returns:
Dataset: instance of conll05st dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import Conll05st
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, pred_idx, mark, label):
return paddle.sum(pred_idx), paddle.sum(mark), paddle.sum(label)
paddle.disable_static()
conll05st = Conll05st()
for i in range(10):
pred_idx, mark, label= conll05st[i][-3:]
pred_idx = paddle.to_tensor(pred_idx)
mark = paddle.to_tensor(mark)
label = paddle.to_tensor(label)
model = SimpleNet()
pred_idx, mark, label= model(pred_idx, mark, label)
print(pred_idx.numpy(), mark.numpy(), label.numpy())
\ No newline at end of file
.. _cn_api_text_datasets_Imdb:
Imdb
-------------------------------
.. py:class:: paddle.text.datasets.Imdb()
Implementation of `IMDB <https://www.imdb.com/interfaces/>`_ dataset.
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' 'test' mode. Default 'train'.
cutoff(int): cutoff number for building word dictionary. Default 150.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of IMDB dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import Imdb
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, doc, label):
return paddle.sum(doc), label
paddle.disable_static()
imdb = Imdb(mode='train')
for i in range(10):
doc, label = imdb[i]
doc = paddle.to_tensor(doc)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(doc, label)
print(doc.numpy().shape, label.numpy().shape)
\ No newline at end of file
.. _cn_api_text_datasets_Imikolov:
Imikolov
-------------------------------
.. py:class:: paddle.text.datasets.Imikolov()
Implementation of imikolov dataset.
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
data_type(str): 'NGRAM' or 'SEQ'. Default 'NGRAM'.
window_size(int): sliding window size for 'NGRAM' data. Default -1.
mode(str): 'train' 'test' mode. Default 'train'.
min_word_freq(int): minimal word frequence for building word dictionary. Default 50.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of imikolov dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import Imikolov
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, src, trg):
return paddle.sum(src), paddle.sum(trg)
paddle.disable_static()
imikolov = Imikolov(mode='train', data_type='SEQ', window_size=2)
for i in range(10):
src, trg = imikolov[i]
src = paddle.to_tensor(src)
trg = paddle.to_tensor(trg)
model = SimpleNet()
src, trg = model(src, trg)
print(src.numpy().shape, trg.numpy().shape)
\ No newline at end of file
.. _cn_api_text_datasets_MovieReviews:
MovieReviews
-------------------------------
.. py:class:: paddle.text.datasets.MovieReviews()
Implementation of `NLTK movie reviews <http://www.nltk.org/nltk_data/>`_ dataset.
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' 'test' mode. Default 'train'.
download(bool): whether auto download cifar dataset if
:attr:`data_file` unset. Default True.
Returns:
Dataset: instance of movie reviews dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import MovieReviews
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, word, category):
return paddle.sum(word), category
paddle.disable_static()
movie_reviews = MovieReviews(mode='train')
for i in range(10):
word_list, category = movie_reviews[i]
word_list = paddle.to_tensor(word_list)
category = paddle.to_tensor(category)
model = SimpleNet()
word_list, category = model(word_list, category)
print(word_list.numpy().shape, category.numpy())
\ No newline at end of file
.. _cn_api_text_datasets_Movielens:
Movielens
-------------------------------
.. py:class:: paddle.text.datasets.Movielens()
Implementation of `Movielens 1-M <https://grouplens.org/datasets/movielens/1m/>`_ dataset.
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' or 'test' mode. Default 'train'.
test_ratio(float): split ratio for test sample. Default 0.1.
rand_seed(int): random seed. Default 0.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of Movielens 1-M dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import Movielens
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, category, title, rating):
return paddle.sum(category), paddle.sum(title), paddle.sum(rating)
paddle.disable_static()
movielens = Movielens(mode='train')
for i in range(10):
category, title, rating = movielens[i][-3:]
category = paddle.to_tensor(category)
title = paddle.to_tensor(title)
rating = paddle.to_tensor(rating)
model = SimpleNet()
category, title, rating = model(category, title, rating)
print(category.numpy().shape, title.numpy().shape, rating.numpy().shape)
\ No newline at end of file
.. _cn_api_text_datasets_UCIHousing:
UCIHousing
-------------------------------
.. py:class:: paddle.text.datasets.UCIHousing()
Implementation of `UCI housing <https://archive.ics.uci.edu/ml/datasets/Housing>`_
dataset
参数
:::::::::
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of UCI housing dataset.
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import UCIHousing
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, feature, target):
return paddle.sum(feature), target
paddle.disable_static()
uci_housing = UCIHousing(mode='train')
for i in range(10):
feature, target = uci_housing[i]
feature = paddle.to_tensor(feature)
target = paddle.to_tensor(target)
model = SimpleNet()
feature, target = model(feature, target)
print(feature.numpy().shape, target.numpy())
\ No newline at end of file
.. _cn_api_text_datasets_WMT14:
WMT14
-------------------------------
.. py:class:: paddle.text.datasets.WMT14()
Implementation of `WMT14 <http://www.statmt.org/wmt14/>`_ test dataset.
The original WMT14 dataset is too large and a small set of data for set is
provided. This module will download dataset from
http://paddlepaddle.bj.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train', 'test' or 'gen'. Default 'train'
dict_size(int): word dictionary size. Default -1.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of WMT14 dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import WMT14
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, src_ids, trg_ids, trg_ids_next):
return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next)
paddle.disable_static()
wmt14 = WMT14(mode='train', dict_size=50)
for i in range(10):
src_ids, trg_ids, trg_ids_next = wmt14[i]
src_ids = paddle.to_tensor(src_ids)
trg_ids = paddle.to_tensor(trg_ids)
trg_ids_next = paddle.to_tensor(trg_ids_next)
model = SimpleNet()
src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next)
print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy())
\ No newline at end of file
.. _cn_api_text_datasets_WMT16:
WMT16
-------------------------------
.. py:class:: paddle.text.datasets.WMT16()
Implementation of `WMT16 <http://www.statmt.org/wmt16/>`_ test dataset.
ACL2016 Multimodal Machine Translation. Please see this website for more
details: http://www.statmt.org/wmt16/multimodal-task.html#task1
If you use the dataset created for your task, please cite the following paper:
Multi30K: Multilingual English-German Image Descriptions.
.. code-block:: text
@article{elliott-EtAl:2016:VL16,
author = {{Elliott}, D. and {Frank}, S. and {Sima"an}, K. and {Specia}, L.},
title = {Multi30K: Multilingual English-German Image Descriptions},
booktitle = {Proceedings of the 6th Workshop on Vision and Language},
year = {2016},
pages = {70--74},
year = 2016
}
参数
:::::::::
data_file(str): path to data tar file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train', 'test' or 'val'. Default 'train'
src_dict_size(int): word dictionary size for source language word. Default -1.
trg_dict_size(int): word dictionary size for target language word. Default -1.
lang(str): source language, 'en' or 'de'. Default 'en'.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of WMT16 dataset
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.text.datasets import WMT16
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, src_ids, trg_ids, trg_ids_next):
return paddle.sum(src_ids), paddle.sum(trg_ids), paddle.sum(trg_ids_next)
paddle.disable_static()
wmt16 = WMT16(mode='train', src_dict_size=50, trg_dict_size=50)
for i in range(10):
src_ids, trg_ids, trg_ids_next = wmt16[i]
src_ids = paddle.to_tensor(src_ids)
trg_ids = paddle.to_tensor(trg_ids)
trg_ids_next = paddle.to_tensor(trg_ids_next)
model = SimpleNet()
src_ids, trg_ids, trg_ids_next = model(src_ids, trg_ids, trg_ids_next)
print(src_ids.numpy(), trg_ids.numpy(), trg_ids_next.numpy())
\ No newline at end of file
.. _cn_api_vision_datasets_Cifar100:
Cifar100
-------------------------------
.. py:class:: paddle.vision.datasets.Cifar100()
Implementation of `Cifar-100 <https://www.cs.toronto.edu/~kriz/cifar.html>`_
dataset, which has 100 categories.
参数
:::::::::
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train', 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for on transform.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of cifar-100 dataset
代码示例
:::::::::
.. code-block:: python
import paddle
import paddle.nn as nn
from paddle.vision.datasets import Cifar100
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (3, -1))
return self.fc(image), label
paddle.disable_static()
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
cifar100 = Cifar100(mode='train', transform=normalize)
for i in range(10):
image, label = cifar100[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
\ No newline at end of file
.. _cn_api_vision_datasets_Cifar10:
Cifar10
-------------------------------
.. py:class:: paddle.vision.datasets.Cifar10()
Implementation of `Cifar-10 <https://www.cs.toronto.edu/~kriz/cifar.html>`_
dataset, which has 10 categories.
参数
:::::::::
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train', 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for on transform.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
Returns:
Dataset: instance of cifar-10 dataset
代码示例
:::::::::
.. code-block:: python
import paddle
import paddle.nn as nn
from paddle.vision.datasets import Cifar10
from paddle.vision.transforms import Normalize
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
self.fc = nn.Sequential(
nn.Linear(3072, 10),
nn.Softmax())
def forward(self, image, label):
image = paddle.reshape(image, (3, -1))
return self.fc(image), label
paddle.disable_static()
normalize = Normalize(mean=[0.5, 0.5, 0.5],
std=[0.5, 0.5, 0.5])
cifar10 = Cifar10(mode='train', transform=normalize)
for i in range(10):
image, label = cifar10[i]
image = paddle.to_tensor(image)
label = paddle.to_tensor(label)
model = SimpleNet()
image, label = model(image, label)
print(image.numpy().shape, label.numpy().shape)
\ No newline at end of file
.. _cn_api_vision_datasets_Flowers:
Flowers
-------------------------------
.. py:class:: paddle.vision.datasets.Flowers()
Implementation of `Flowers <https://www.robots.ox.ac.uk/~vgg/data/flowers/>`_
dataset
参数
:::::::::
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
label_file(str): path to label file, can be set None if
:attr:`download` is True. Default None
setid_file(str): path to subset index file, can be set
None if :attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
transform(callable): transform to perform on image, None for on transform.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
代码示例
:::::::::
.. code-block:: python
from paddle.vision.datasets import Flowers
flowers = Flowers(mode='test')
for i in range(len(flowers)):
sample = flowers[i]
print(sample[0].shape, sample[1])
\ No newline at end of file
.. _cn_api_vision_datasets_MNIST:
MNIST
-------------------------------
.. py:class:: paddle.vision.datasets.MNIST()
Implementation of `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset
参数
:::::::::
image_path(str): path to image file, can be set None if
:attr:`download` is True. Default None
label_path(str): path to label file, can be set None if
:attr:`download` is True. Default None
chw_format(bool): If set True, the output shape is [1, 28, 28],
otherwise, output shape is [1, 784]. Default True.
mode(str): 'train' or 'test' mode. Default 'train'.
download(bool): whether to download dataset automatically if
:attr:`image_path` :attr:`label_path` is not set. Default True
Returns:
Dataset: MNIST Dataset.
代码示例
:::::::::
.. code-block:: python
from paddle.vision.datasets import MNIST
mnist = MNIST(mode='test')
for i in range(len(mnist)):
sample = mnist[i]
print(sample[0].shape, sample[1])
\ No newline at end of file
.. _cn_api_vision_datasets_VOC2012:
VOC2012
-------------------------------
.. py:class:: paddle.vision.datasets.VOC2012()
Implementation of `VOC2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ dataset
参数
:::::::::
data_file(str): path to data file, can be set None if
:attr:`download` is True. Default None
mode(str): 'train', 'valid' or 'test' mode. Default 'train'.
download(bool): whether to download dataset automatically if
:attr:`data_file` is not set. Default True
代码示例
:::::::::
.. code-block:: python
import paddle
from paddle.vision.datasets import VOC2012
class SimpleNet(paddle.nn.Layer):
def __init__(self):
super(SimpleNet, self).__init__()
def forward(self, image, label):
return paddle.sum(image), label
paddle.disable_static()
voc2012 = VOC2012(mode='train')
for i in range(10):
image, label= voc2012[i]
image = paddle.cast(paddle.to_tensor(image), 'float32')
label = paddle.to_tensor(label)
model = SimpleNet()
image, label= model(image, label)
print(image.numpy().shape, label.numpy().shape)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册