data_dispatch.html 25.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>训练数据的存储和分发 &mdash; PaddlePaddle  文档</title>
  

  
  

  

  
  
    

  

  
  
    <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  

  
  
        <link rel="index" title="索引"
              href="../../genindex.html"/>
        <link rel="search" title="搜索" href="../../search.html"/>
    <link rel="top" title="PaddlePaddle  文档" href="../../index.html"/> 

  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/override.css" type="text/css" />
  <script>
  var _hmt = _hmt || [];
  (function() {
    var hm = document.createElement("script");
    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
    var s = document.getElementsByTagName("script")[0]; 
    s.parentNode.insertBefore(hm, s);
  })();
  </script>

  

  
  <script src="../../_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  
  <header class="site-header">
    <div class="site-logo">
      <a href="/"><img src="../../_static/images/PP_w.png"></a>
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
65
        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
66 67 68 69 70 71 72 73 74 75 76 77
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
            <i class="fa fa-angle-up"></i>
            <i class="fa fa-angle-down"></i>
          </a>
          <ul class="dropdown-menu">
            <li><a href="/doc_cn">中文</a></li>
            <li><a href="/doc">English</a></li>
          </ul>
        </div>
        <ul class="site-page-links">
78
          <li><a href="/">Home</a></li>
79 80 81 82 83 84 85 86 87
        </ul>
      </div>
      <div class="doc-module">
        
        <ul>
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_cn.html">新手入门</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../howto/index_cn.html">进阶指南</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_cn.html">API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../faq/index_cn.html">FAQ</a></li>
88
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_cn.html">MOBILE</a></li>
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
</ul>

        
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>        
      </div>
    </div>
  </header>
  
  <div class="main-content-wrap">

    
    <nav class="doc-menu-vertical" role="navigation">
        
          
          <ul>
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_cn.html">新手入门</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/build_and_install/index_cn.html">安装与编译</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/docker_install_cn.html">PaddlePaddle的Docker容器使用方式</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/cmake/build_from_source_cn.html">PaddlePaddle的编译选项</a></li>
</ul>
</li>
116
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/concepts/use_concepts_cn.html">基本使用概念</a></li>
117 118 119 120 121 122 123 124 125
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../howto/index_cn.html">进阶指南</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/cmd_parameter/index_cn.html">设置命令行参数</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/use_case_cn.html">使用案例</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/arguments_cn.html">参数概述</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/detail_introduction_cn.html">细节描述</a></li>
</ul>
</li>
126
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/cluster/cluster_train_cn.html">PaddlePaddle分布式训练</a></li>
127 128 129
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/k8s/k8s_basis_cn.html">Kubernetes 简介</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/k8s/k8s_cn.html">Kubernetes单机训练</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/k8s/k8s_distributed_cn.html">Kubernetes分布式训练</a></li>
130
<li class="toctree-l2"><a class="reference internal" href="../../howto/dev/build_cn.html">编译PaddlePaddle和运行单元测试</a></li>
131 132
<li class="toctree-l2"><a class="reference internal" href="../../howto/dev/write_docs_cn.html">如何贡献/修改文档</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../howto/deep_model/rnn/index_cn.html">RNN相关模型</a><ul>
133
<li class="toctree-l3"><a class="reference internal" href="../../howto/deep_model/rnn/rnn_config_cn.html">RNN配置</a></li>
134 135 136 137 138 139 140 141 142 143 144 145
<li class="toctree-l3"><a class="reference internal" href="../../howto/deep_model/rnn/recurrent_group_cn.html">Recurrent Group教程</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/deep_model/rnn/hierarchical_layer_cn.html">支持双层序列作为输入的Layer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/deep_model/rnn/hrnn_rnn_api_compare_cn.html">单双层RNN API对比介绍</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../howto/optimization/gpu_profiling_cn.html">GPU性能分析与调优</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_cn.html">API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/model_configs.html">模型配置</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/activation.html">Activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/layer.html">Layers</a></li>
146
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/evaluators.html">Evaluators</a></li>
147 148 149 150 151 152
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/pooling.html">Pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/networks.html">Networks</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/attr.html">Parameter Attribute</a></li>
</ul>
</li>
153 154 155 156 157 158
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/data.html">数据访问</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/data_reader.html">Data Reader Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/image.html">Image Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/dataset.html">Dataset</a></li>
</ul>
</li>
159 160 161
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/run_logic.html">训练与应用</a></li>
</ul>
</li>
162 163 164 165 166 167 168 169
<li class="toctree-l1"><a class="reference internal" href="../../faq/index_cn.html">FAQ</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../faq/build_and_install/index_cn.html">编译安装与单元测试</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/model/index_cn.html">模型配置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/parameter/index_cn.html">参数设置</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/local/index_cn.html">本地训练与预测</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../faq/cluster/index_cn.html">集群训练与预测</a></li>
</ul>
</li>
170 171 172 173 174 175
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_cn.html">MOBILE</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_android_cn.html">构建Android平台上的PaddlePaddle库</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_ios_cn.html">构建iOS平台上的PaddlePaddle库</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_raspberry_cn.html">构建Raspberry Pi平台上的PaddlePaddle库</a></li>
</ul>
</li>
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
</ul>

        
    </nav>
    
    <section class="doc-content-wrap">

      

 







<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
      
    <li>训练数据的存储和分发</li>
  </ul>
</div>
      
      <div class="wy-nav-content" id="doc-content">
        <div class="rst-content">
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="">
<span id="id1"></span><h1>训练数据的存储和分发<a class="headerlink" href="#" title="永久链接至标题"></a></h1>
<div class="section" id="">
208 209 210 211
<span id="id2"></span><h2>概念解释<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
</div>
<div class="section" id="">
<span id="id3"></span><h2>流程介绍<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
212 213 214 215 216 217
<p>生产环境中的训练数据集通常体积很大,并被存储在诸如Hadoop HDFS,Ceph,AWS S3之类的分布式存储之上。这些分布式存储服务通常会把数据切割成多个分片分布式的存储在多个节点之上。这样就可以在云端执行多种数据类计算任务,包括:</p>
<ul class="simple">
<li>数据预处理任务</li>
<li>Paddle训练任务</li>
<li>在线模型预测服务</li>
</ul>
218 219 220
<div style="align: center">
<img src="src/paddle-cloud-in-data-center.png" width="800"/>
</div><p>在上图中显示了在一个实际生产环境中的应用(人脸识别)的数据流图。生产环境的日志数据会通过实时流的方式(Kafka)和离线数据的方式(HDFS)存储,并在集群中运行多个分布式数据处理任务,比如流式数据处理(online data process),离线批处理(offline data process)完成数据的预处理,提供给paddle作为训练数据。用户也可以上传labeled data到分布式存储补充训练数据。在paddle之上运行的深度学习训练输出的模型会提供给在线人脸识别的应用使用。</p>
221 222
</div>
<div class="section" id="">
223 224 225
<span id="id4"></span><h2>训练数据存储<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
<p>我们选择<a class="reference external" href="http://docs.ceph.com/docs/master/cephfs/">CephFS</a>作为存储系统。</p>
<ul class="simple">
226
<li>无论是从<a class="reference internal" href="../file_manager/README.html"><span class="doc">PFSClient</span></a>的角度,还是从<a class="reference external" href="https://kubernetes.io/docs/concepts/workloads/pods/pod/">Pod</a>中运行任务的角度,统一用<code class="docutils literal"><span class="pre">/pfs/$DATACENTER/home/$USER</span></code>来访问用户自己的数据。</li>
227 228 229 230 231 232 233 234
<li><code class="docutils literal"><span class="pre">/pfs/$DATACENTER/common</span></code>下存放公共数据集合<ul>
<li>做只读挂载</li>
</ul>
</li>
</ul>
<div style="align: center">
<img src="src/file_storage.png" width="700" align=center/>
</div></div>
235
<div class="section" id="">
236 237
<span id="id5"></span><h2>文件预处理<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
<p>在开始训练之前, 数据集需要预先被转换成PaddlePaddle分布式训练使用的存储格<a class="reference external" href="https://github.com/PaddlePaddle/Paddle/issues/1947">RecordIO</a>。我们提供两个转换方式:</p>
238 239 240 241
<ol class="simple">
<li>用户在本地转换好再上传</li>
<li>用户上传数据后,在机群上运行转换程序</li>
</ol>
242 243
<p>转换生成的文件名会是以下格式:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>name_prefix-aaaaa-of-bbbbb
244 245
</pre></div>
</div>
246 247 248 249 250 251
<p>&#8220;aaaaa&#8221;&#8221;bbbbb&#8221;都是五位的数字,每一个文件是数据集的一个shard,&#8221;aaaaa&#8221;代表shard的index,&#8221;bbbbb&#8221;代表这个shard的最大index。</p>
<p>比如ImageNet这个数据集可能被分成1000个shard,它们的文件名是:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>imagenet-00000-of-00999
imagenet-00001-of-00999
...
imagenet-00999-of-00999
252 253
</pre></div>
</div>
254
<div class="section" id="">
255
<span id="id6"></span><h3>转换库<a class="headerlink" href="#" title="永久链接至标题"></a></h3>
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
<p>无论是在本地或是云端转换,我们都提供Python的转换库,接口是:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">convert</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="n">reader</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">name_prefix</span><span class="p">)</span>
</pre></div>
</div>
<ul class="simple">
<li><code class="docutils literal"><span class="pre">output_path</span></code>: directory in which output files will be saved.</li>
<li><code class="docutils literal"><span class="pre">reader</span></code>: a <a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#data-reader-interface">data reader</a>, from which the convert program will read data instances.</li>
<li><code class="docutils literal"><span class="pre">num_shards</span></code>: the number of shards that the dataset will be partitioned into.</li>
<li><code class="docutils literal"><span class="pre">name_prefix</span></code>: the name prefix of generated files.</li>
</ul>
<p><code class="docutils literal"><span class="pre">reader</span></code>每次输出一个data instance,这个instance可以是单个值,或者用tuple表示的多个值:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">yield</span> <span class="mi">1</span> <span class="c1"># 单个值</span>
<span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">)</span> <span class="c1"># 单个值</span>
<span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">),</span> <span class="mi">0</span> <span class="c1"># 多个值</span>
</pre></div>
</div>
<p>每个值的类型可以是整形、浮点型数据、字符串,或者由它们组成的list,以及numpy.ndarray。如果是其它类型,会被Pickle序列化成字符串。</p>
</div>
</div>
<div class="section" id="">
276
<span id="id7"></span><h2>示例程序<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
277
<div class="section" id="">
278
<span id="id8"></span><h3>使用转换库<a class="headerlink" href="#" title="永久链接至标题"></a></h3>
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
<p>以下<code class="docutils literal"><span class="pre">reader_creator</span></code>生成的<code class="docutils literal"><span class="pre">reader</span></code>每次输出一个data instance,每个data instance包涵两个值:numpy.ndarray类型的值和整型的值:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">reader_creator</span><span class="p">():</span>
    <span class="k">def</span> <span class="nf">reader</span><span class="p">():</span>
        <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">):</span>
            <span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">),</span> <span class="mi">0</span> <span class="c1"># 多个值</span>
    <span class="k">return</span> <span class="n">reader</span>
</pre></div>
</div>
<p><code class="docutils literal"><span class="pre">reader_creator</span></code>生成的<code class="docutils literal"><span class="pre">reader</span></code>传入<code class="docutils literal"><span class="pre">convert</span></code>函数即可完成转换:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">convert</span><span class="p">(</span><span class="s2">&quot;./&quot;</span><span class="p">,</span> <span class="n">reader_creator</span><span class="p">(),</span> <span class="mi">100</span><span class="p">,</span> <span class="n">random_images</span><span class="p">)</span>
</pre></div>
</div>
<p>以上命令会在当前目录下生成100个文件:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>random_images-00000-of-00099
random_images-00001-of-00099
...
random_images-00099-of-00099
296 297 298
</pre></div>
</div>
</div>
299
<div class="section" id="">
300 301
<span id="id9"></span><h3>进行训练<a class="headerlink" href="#" title="永久链接至标题"></a></h3>
<p>PaddlePaddle提供专用的<a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#python-data-reader-design-doc">data reader creator</a>,生成给定<code class="docutils literal"><span class="pre">RecordIO</span></code>文件对应的data reader。<strong>无论在本地还是在云端,reader的使用方式都是一致的</strong></p>
302
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># ...</span>
303
<span class="n">reader</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">reader</span><span class="o">.</span><span class="n">creator</span><span class="o">.</span><span class="n">RecordIO</span><span class="p">(</span><span class="s2">&quot;/pfs/datacenter_name/home/user_name/random_images-*-of-*&quot;</span><span class="p">)</span>
304 305 306 307
<span class="n">batch_reader</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">batch</span><span class="p">(</span><span class="n">paddle</span><span class="o">.</span><span class="n">dataset</span><span class="o">.</span><span class="n">mnist</span><span class="o">.</span><span class="n">train</span><span class="p">(),</span> <span class="mi">128</span><span class="p">)</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">batch_reader</span><span class="p">,</span> <span class="o">...</span><span class="p">)</span>
</pre></div>
</div>
308
<p>以上代码的reader输出的data instance与生成数据集时,reader输出的data instance是一模一样的。</p>
309 310
</div>
</div>
311
<div class="section" id="">
312
<span id="id10"></span><h2>上传训练文件<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
313
<p>使用下面命令,可以把本地的数据上传到存储集群中。</p>
314
<div class="highlight-bash"><div class="highlight"><pre><span></span>paddle pfs cp filename /pfs/<span class="nv">$DATACENTER</span>/home/<span class="nv">$USER</span>/folder/
315 316 317
</pre></div>
</div>
<p>比如,把之前示例中转换完毕的random_images数据集上传到云端的<code class="docutils literal"><span class="pre">/home/</span></code>可以用以下指令:</p>
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
<div class="highlight-bash"><div class="highlight"><pre><span></span>paddle pfs cp random_images-*-of-* /pfs/<span class="nv">$DATACENTER</span>/home/<span class="nv">$USER</span>/folder/
</pre></div>
</div>
<p>需要<code class="docutils literal"><span class="pre">$DATACENTER</span></code>的配置写到配置文件中,例如</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="c1"># config file</span>
<span class="p">[</span><span class="n">datacenter_1</span><span class="p">]</span>
<span class="n">username</span><span class="o">=</span><span class="n">user</span>
<span class="n">usercert</span><span class="o">=</span><span class="n">user</span><span class="o">.</span><span class="n">pem</span>
<span class="n">userkey</span><span class="o">=</span><span class="n">user</span><span class="o">-</span><span class="n">key</span><span class="o">.</span><span class="n">pem</span>
<span class="n">endpoint</span><span class="o">=</span><span class="n">datacenter1</span><span class="o">.</span><span class="n">paddlepaddle</span><span class="o">.</span><span class="n">org</span>

<span class="p">[</span><span class="n">datacenter_2</span><span class="p">]</span>
<span class="n">username</span><span class="o">=</span><span class="n">user</span>
<span class="n">usercert</span><span class="o">=</span><span class="n">user</span><span class="o">.</span><span class="n">pem</span>
<span class="n">userkey</span><span class="o">=</span><span class="n">user</span><span class="o">-</span><span class="n">key</span><span class="o">.</span><span class="n">pem</span>
<span class="n">endpoint</span><span class="o">=</span><span class="n">datacenter2</span><span class="o">.</span><span class="n">paddlepaddle</span><span class="o">.</span><span class="n">org</span>
334 335 336 337 338 339
</pre></div>
</div>
</div>
</div>
<div class="section" id="todo">
<span id="todo"></span><h1>TODO<a class="headerlink" href="#todo" title="永久链接至标题"></a></h1>
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
<div class="section" id="">
<span id="id11"></span><h2>文件访问的权限<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
<p>控制用户权限</p>
<ul class="simple">
<li>用户可以把自己的数据分享给别人</li>
</ul>
</div>
<div class="section" id="">
<span id="id12"></span><h2>文件访问方式<a class="headerlink" href="#" title="永久链接至标题"></a></h2>
<p>不用mount的方式来访问数据,而是直接用API的接口远程访问</p>
<p>例如:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s1">&#39;/pfs/datacenter_name/home/user_name/test1.dat&#39;</span><span class="p">)</span>
</pre></div>
</div>
</div>
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
<div class="section" id="job">
<span id="job"></span><h2>支持用户自定义的数据预处理job<a class="headerlink" href="#job" title="永久链接至标题"></a></h2>
</div>
</div>


           </div>
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'../../',
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
395 396
            HAS_SOURCE:  true,
            SOURCELINK_SUFFIX: ".txt",
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
        };
    </script>
      <script type="text/javascript" src="../../_static/jquery.js"></script>
      <script type="text/javascript" src="../../_static/underscore.js"></script>
      <script type="text/javascript" src="../../_static/doctools.js"></script>
      <script type="text/javascript" src="../../_static/translations.js"></script>
      <script type="text/javascript" src="https://cdn.bootcss.com/mathjax/2.7.0/MathJax.js"></script>
       
  

  
  
    <script type="text/javascript" src="../../_static/js/theme.js"></script>
  
  
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
  <script src="../../_static/js/paddle_doc_init.js"></script> 

</body>
</html>