diff --git a/develop/doc/api/v1/data_provider/pydataprovider2_en.html b/develop/doc/api/v1/data_provider/pydataprovider2_en.html index d9b11178c0c18ebf1bdedfa205b1b35a6d60f658..37d2038780ebee2ba4ee2c00ff8c9163b7f58e4a 100644 --- a/develop/doc/api/v1/data_provider/pydataprovider2_en.html +++ b/develop/doc/api/v1/data_provider/pydataprovider2_en.html @@ -239,19 +239,20 @@ label of an image. The second part contains 28x28 pixel float values.
The corresponding dataprovider is shown as below:
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer.PyDataProvider2 import *
@@ -307,19 +308,20 @@ sample by using keywords y
generator
.
Only a few lines of codes need to be added into the training configuration file,
you can take this as an example.
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer_config_helpers import *
define_py_data_sources2(
@@ -373,19 +375,21 @@ the dataprovider
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+37
+38
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer.PyDataProvider2 import *
@@ -447,19 +451,20 @@ negative sentiment (marked by 0 and 1 respectively).
The corresponding data provider can be found in the path below:
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer.PyDataProvider2 import *
@@ -520,19 +525,20 @@ configuration file, and it maps word string to word id.
To pass these parameters into DataProvider, the following lines should be added
into trainer configuration file.
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer_config_helpers import *
dictionary = dict()
diff --git a/develop/doc_cn/api/v1/data_provider/pydataprovider2_cn.html b/develop/doc_cn/api/v1/data_provider/pydataprovider2_cn.html
index 963514864ee3dfb74e49cde6df9bad14f0861a85..3967fca772a765071105641f10bd5e39b4b4eff8 100644
--- a/develop/doc_cn/api/v1/data_provider/pydataprovider2_cn.html
+++ b/develop/doc_cn/api/v1/data_provider/pydataprovider2_cn.html
@@ -282,19 +282,20 @@
dataprovider的使用¶
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer.PyDataProvider2 import *
@@ -327,8 +328,8 @@
其次,定义一个Python的 Decorator @provider 。用于将下一行的数据输入函数标记成一个PyDataProvider2,同时设置它的input_types属性。
input_types:设置这个PyDataProvider2返回什么样的数据。本例根据网络配置中 data_layer
的名字,显式指定返回的是一个28*28维的稠密浮点数向量和一个[0-9]的10维整数标签。
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
@@ -356,13 +357,13 @@
网络配置中的调用¶
在网络配置里,只需要一行代码就可以调用这个PyDataProvider2,如,
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
训练数据是 train.list
,没有测试数据,调用的PyDataProvider2是 mnist_provider
模块中的 process
函数。
@@ -399,19 +400,20 @@
- 其中
input_types
和在 @provider 中配置的效果一致。本例中的输入特征是词ID的序列,因此使用 integer_value_sequence
类型来设置。
- 将
dictionary
存入settings对象,在 process
函数中使用。 dictionary是从网络配置中传入的dict对象,即一个将单词字符串映射到单词ID的字典。
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
from paddle.trainer.PyDataProvider2 import *
@@ -465,21 +467,22 @@
- 在配置中需要读取外部字典。
- 在声明DataProvider的时候传入dictionary作为参数。
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from paddle.trainer_config_helpers import *
-
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
dictionary = dict()
... # read dictionary from outside
diff --git a/develop/doc_cn/faq/local/index_cn.html b/develop/doc_cn/faq/local/index_cn.html
index 409448eeeb66e3c154137d07235a01b13453afcc..0bf82bbb6f59bc36478494b3bd5d42d6b0f586dd 100644
--- a/develop/doc_cn/faq/local/index_cn.html
+++ b/develop/doc_cn/faq/local/index_cn.html
@@ -279,19 +279,21 @@ PaddlePaddle的内存占用主要分为如下几个方面:
所以,减小这个内存池即可减小内存占用,同时也可以加速开始训练前数据载入的过程。但是,这
个内存池实际上决定了shuffle的粒度。所以,如果将这个内存池减小,又要保证数据是随机的,
那么最好将数据文件在每次读取之前做一次shuffle。可能的代码为
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
@provider(min_pool_size=0, ...)
def process(settings, filename):
os.system('shuf %s > %s.shuf' % (filename, filename)) # shuffle before.
@@ -335,19 +337,21 @@ PaddlePaddle的内存占用主要分为如下几个方面:
减少数据载入的耗时¶
使用pydataprovider
时,可以减少缓存池的大小,同时设置内存缓存功能,即可以极大的加速数据载入流程。
DataProvider
缓存池的减小,和之前减小通过减小缓存池来减小内存占用的原理一致。
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
@provider(min_pool_size=0, ...)
def process(settings, filename):
os.system('shuf %s > %s.shuf' % (filename, filename)) # shuffle before.
@@ -363,19 +367,20 @@ PaddlePaddle的内存占用主要分为如下几个方面:
PaddlePaddle支持Sparse的训练,sparse训练需要训练特征是 sparse_binary_vector
、 sparse_vector
、或者 integer_value
的任一一种。同时,与这个训练数据交互的Layer,需要将其Parameter设置成 sparse 更新模式,即设置 sparse_update=True
这里使用简单的 word2vec
训练语言模型距离,具体使用方法为:
使用一个词前两个词和后两个词,来预测这个中间的词。这个任务的DataProvider为:
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
DICT_DIM = 3000
@@ -389,19 +394,20 @@ PaddlePaddle的内存占用主要分为如下几个方面:
这个任务的配置为:
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
... # the settings and define data provider is omitted.
DICT_DIM = 3000 # dictionary dimension.
word_ids = data_layer('word_ids', size=DICT_DIM)
diff --git a/develop/doc_cn/getstarted/concepts/use_concepts_cn.html b/develop/doc_cn/getstarted/concepts/use_concepts_cn.html
index db8882924dd6319f1c683ef62da7a30a55103520..9329aa999d94c02ce51898ec68b2d3dcfdb88477 100644
--- a/develop/doc_cn/getstarted/concepts/use_concepts_cn.html
+++ b/develop/doc_cn/getstarted/concepts/use_concepts_cn.html
@@ -442,19 +442,21 @@ trainer.train(
67
68
69
-70
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+70
+71
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import paddle.v2 as paddle
import numpy as np
@@ -545,19 +547,21 @@ trainer.train(
28
29
30
-31
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+31
+32
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import paddle.v2 as paddle
import numpy as np