data_dispatch.html 25.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>训练数据的存储和分发 &mdash; PaddlePaddle  documentation</title>
  

  
  

  

  
  
    

  

  
  
    <link rel="stylesheet" href="../../_static/css/theme.css" type="text/css" />
  

  
  
        <link rel="index" title="Index"
              href="../../genindex.html"/>
        <link rel="search" title="Search" href="../../search.html"/>
    <link rel="top" title="PaddlePaddle  documentation" href="../../index.html"/> 

  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
  <link rel="stylesheet" href="../../_static/css/override.css" type="text/css" />
  <script>
  var _hmt = _hmt || [];
  (function() {
    var hm = document.createElement("script");
    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
    var s = document.getElementsByTagName("script")[0]; 
    s.parentNode.insertBefore(hm, s);
  })();
  </script>

  

  
  <script src="../../_static/js/modernizr.min.js"></script>

</head>

<body class="wy-body-for-nav" role="document">

  
  <header class="site-header">
    <div class="site-logo">
      <a href="/"><img src="../../_static/images/PP_w.png"></a>
    </div>
    <div class="site-nav-links">
      <div class="site-menu">
65
        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Fork me on Github</a>
66 67 68 69 70 71 72 73 74 75 76 77
        <div class="language-switcher dropdown">
          <a type="button" data-toggle="dropdown">
            <span>English</span>
            <i class="fa fa-angle-up"></i>
            <i class="fa fa-angle-down"></i>
          </a>
          <ul class="dropdown-menu">
            <li><a href="/doc_cn">中文</a></li>
            <li><a href="/doc">English</a></li>
          </ul>
        </div>
        <ul class="site-page-links">
78
          <li><a href="/">Home</a></li>
79 80 81 82 83 84 85 86
        </ul>
      </div>
      <div class="doc-module">
        
        <ul>
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_en.html">GET STARTED</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../howto/index_en.html">HOW TO</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_en.html">API</a></li>
87
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_en.html">MOBILE</a></li>
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
</ul>

        
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>        
      </div>
    </div>
  </header>
  
  <div class="main-content-wrap">

    
    <nav class="doc-menu-vertical" role="navigation">
        
          
          <ul>
<li class="toctree-l1"><a class="reference internal" href="../../getstarted/index_en.html">GET STARTED</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../getstarted/build_and_install/index_en.html">Install and Build</a><ul>
111 112
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/pip_install_en.html">Install Using pip</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/docker_install_en.html">Run in Docker Containers</a></li>
113
<li class="toctree-l3"><a class="reference internal" href="../../howto/dev/build_en.html">Build using Docker</a></li>
114
<li class="toctree-l3"><a class="reference internal" href="../../getstarted/build_and_install/build_from_source_en.html">Build from Sources</a></li>
115 116 117 118 119 120 121 122 123 124 125
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../howto/index_en.html">HOW TO</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/cmd_parameter/index_en.html">Set Command-line Parameters</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/use_case_en.html">Use Case</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/arguments_en.html">Argument Outline</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../howto/usage/cmd_parameter/detail_introduction_en.html">Detail Description</a></li>
</ul>
</li>
126
<li class="toctree-l2"><a class="reference internal" href="../../howto/usage/cluster/cluster_train_en.html">PaddlePaddle Distributed Training</a></li>
127
<li class="toctree-l2"><a class="reference internal" href="../../howto/dev/new_layer_en.html">Write New Layers</a></li>
128
<li class="toctree-l2"><a class="reference internal" href="../../howto/dev/contribute_to_paddle_en.html">Contribute Code</a></li>
129
<li class="toctree-l2"><a class="reference internal" href="../../howto/dev/write_docs_en.html">Contribute Documentation</a></li>
130 131 132 133
<li class="toctree-l2"><a class="reference internal" href="../../howto/deep_model/rnn/index_en.html">RNN Models</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../howto/deep_model/rnn/rnn_config_en.html">RNN Configuration</a></li>
</ul>
</li>
134 135 136 137 138 139 140
<li class="toctree-l2"><a class="reference internal" href="../../howto/optimization/gpu_profiling_en.html">Tune GPU Performance</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../api/index_en.html">API</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/model_configs.html">Model Configuration</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/activation.html">Activation</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/layer.html">Layers</a></li>
141
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/evaluators.html">Evaluators</a></li>
142 143 144 145 146 147
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/pooling.html">Pooling</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/networks.html">Networks</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/config/attr.html">Parameter Attribute</a></li>
</ul>
</li>
148 149 150 151 152 153
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/data.html">Data Reader Interface and DataSets</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/data_reader.html">Data Reader Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/image.html">Image Interface</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/data/dataset.html">Dataset</a></li>
</ul>
</li>
154
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/run_logic.html">Training and Inference</a></li>
155 156 157 158 159 160 161 162 163 164 165 166 167
<li class="toctree-l2"><a class="reference internal" href="../../api/v2/fluid.html">Fluid</a><ul>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/layers.html">Layers</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/data_feeder.html">DataFeeder</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/executor.html">Executor</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/initializer.html">Initializer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/evaluator.html">Evaluator</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/nets.html">Nets</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/optimizer.html">Optimizer</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/param_attr.html">ParamAttr</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/profiler.html">Profiler</a></li>
<li class="toctree-l3"><a class="reference internal" href="../../api/v2/fluid/regularizer.html">Regularizer</a></li>
</ul>
</li>
168 169
</ul>
</li>
170 171
<li class="toctree-l1"><a class="reference internal" href="../../mobile/index_en.html">MOBILE</a><ul>
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_android_en.html">Build PaddlePaddle for Android</a></li>
172
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_ios_en.html">PaddlePaddle Compiling Guide for iOS</a></li>
173 174 175
<li class="toctree-l2"><a class="reference internal" href="../../mobile/cross_compiling_for_raspberry_en.html">Build PaddlePaddle for Raspberry Pi</a></li>
</ul>
</li>
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
</ul>

        
    </nav>
    
    <section class="doc-content-wrap">

      

 







<div role="navigation" aria-label="breadcrumbs navigation">
  <ul class="wy-breadcrumbs">
      
    <li>训练数据的存储和分发</li>
  </ul>
</div>
      
      <div class="wy-nav-content" id="doc-content">
        <div class="rst-content">
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
  <div class="section" id="">
<span id="id1"></span><h1>训练数据的存储和分发<a class="headerlink" href="#" title="Permalink to this headline"></a></h1>
<div class="section" id="">
208 209 210 211
<span id="id2"></span><h2>概念解释<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
</div>
<div class="section" id="">
<span id="id3"></span><h2>流程介绍<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
212 213 214 215 216 217
<p>生产环境中的训练数据集通常体积很大,并被存储在诸如Hadoop HDFS,Ceph,AWS S3之类的分布式存储之上。这些分布式存储服务通常会把数据切割成多个分片分布式的存储在多个节点之上。这样就可以在云端执行多种数据类计算任务,包括:</p>
<ul class="simple">
<li>数据预处理任务</li>
<li>Paddle训练任务</li>
<li>在线模型预测服务</li>
</ul>
218 219 220
<div style="align: center">
<img src="src/paddle-cloud-in-data-center.png" width="800"/>
</div><p>在上图中显示了在一个实际生产环境中的应用(人脸识别)的数据流图。生产环境的日志数据会通过实时流的方式(Kafka)和离线数据的方式(HDFS)存储,并在集群中运行多个分布式数据处理任务,比如流式数据处理(online data process),离线批处理(offline data process)完成数据的预处理,提供给paddle作为训练数据。用户也可以上传labeled data到分布式存储补充训练数据。在paddle之上运行的深度学习训练输出的模型会提供给在线人脸识别的应用使用。</p>
221 222
</div>
<div class="section" id="">
223 224 225
<span id="id4"></span><h2>训练数据存储<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
<p>我们选择<a class="reference external" href="http://docs.ceph.com/docs/master/cephfs/">CephFS</a>作为存储系统。</p>
<ul class="simple">
226
<li>无论是从<a class="reference internal" href="../file_manager/README.html"><span class="doc">PFSClient</span></a>的角度,还是从<a class="reference external" href="https://kubernetes.io/docs/concepts/workloads/pods/pod/">Pod</a>中运行任务的角度,统一用<code class="docutils literal"><span class="pre">/pfs/$DATACENTER/home/$USER</span></code>来访问用户自己的数据。</li>
227 228 229 230 231 232 233 234
<li><code class="docutils literal"><span class="pre">/pfs/$DATACENTER/common</span></code>下存放公共数据集合<ul>
<li>做只读挂载</li>
</ul>
</li>
</ul>
<div style="align: center">
<img src="src/file_storage.png" width="700" align=center/>
</div></div>
235
<div class="section" id="">
236 237
<span id="id5"></span><h2>文件预处理<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
<p>在开始训练之前, 数据集需要预先被转换成PaddlePaddle分布式训练使用的存储格<a class="reference external" href="https://github.com/PaddlePaddle/Paddle/issues/1947">RecordIO</a>。我们提供两个转换方式:</p>
238 239 240 241
<ol class="simple">
<li>用户在本地转换好再上传</li>
<li>用户上传数据后,在机群上运行转换程序</li>
</ol>
242 243
<p>转换生成的文件名会是以下格式:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>name_prefix-aaaaa-of-bbbbb
244 245
</pre></div>
</div>
246 247 248 249 250 251
<p>&#8220;aaaaa&#8221;&#8221;bbbbb&#8221;都是五位的数字,每一个文件是数据集的一个shard,&#8221;aaaaa&#8221;代表shard的index,&#8221;bbbbb&#8221;代表这个shard的最大index。</p>
<p>比如ImageNet这个数据集可能被分成1000个shard,它们的文件名是:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>imagenet-00000-of-00999
imagenet-00001-of-00999
...
imagenet-00999-of-00999
252 253
</pre></div>
</div>
254
<div class="section" id="">
255
<span id="id6"></span><h3>转换库<a class="headerlink" href="#" title="Permalink to this headline"></a></h3>
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
<p>无论是在本地或是云端转换,我们都提供Python的转换库,接口是:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">convert</span><span class="p">(</span><span class="n">output_path</span><span class="p">,</span> <span class="n">reader</span><span class="p">,</span> <span class="n">num_shards</span><span class="p">,</span> <span class="n">name_prefix</span><span class="p">)</span>
</pre></div>
</div>
<ul class="simple">
<li><code class="docutils literal"><span class="pre">output_path</span></code>: directory in which output files will be saved.</li>
<li><code class="docutils literal"><span class="pre">reader</span></code>: a <a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#data-reader-interface">data reader</a>, from which the convert program will read data instances.</li>
<li><code class="docutils literal"><span class="pre">num_shards</span></code>: the number of shards that the dataset will be partitioned into.</li>
<li><code class="docutils literal"><span class="pre">name_prefix</span></code>: the name prefix of generated files.</li>
</ul>
<p><code class="docutils literal"><span class="pre">reader</span></code>每次输出一个data instance,这个instance可以是单个值,或者用tuple表示的多个值:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">yield</span> <span class="mi">1</span> <span class="c1"># 单个值</span>
<span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">)</span> <span class="c1"># 单个值</span>
<span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">),</span> <span class="mi">0</span> <span class="c1"># 多个值</span>
</pre></div>
</div>
<p>每个值的类型可以是整形、浮点型数据、字符串,或者由它们组成的list,以及numpy.ndarray。如果是其它类型,会被Pickle序列化成字符串。</p>
</div>
</div>
<div class="section" id="">
276
<span id="id7"></span><h2>示例程序<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
277
<div class="section" id="">
278
<span id="id8"></span><h3>使用转换库<a class="headerlink" href="#" title="Permalink to this headline"></a></h3>
279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
<p>以下<code class="docutils literal"><span class="pre">reader_creator</span></code>生成的<code class="docutils literal"><span class="pre">reader</span></code>每次输出一个data instance,每个data instance包涵两个值:numpy.ndarray类型的值和整型的值:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="k">def</span> <span class="nf">reader_creator</span><span class="p">():</span>
    <span class="k">def</span> <span class="nf">reader</span><span class="p">():</span>
        <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">1000</span><span class="p">):</span>
            <span class="k">yield</span> <span class="n">numpy</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">uniform</span><span class="p">(</span><span class="o">-</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="n">size</span><span class="o">=</span><span class="mi">28</span><span class="o">*</span><span class="mi">28</span><span class="p">),</span> <span class="mi">0</span> <span class="c1"># 多个值</span>
    <span class="k">return</span> <span class="n">reader</span>
</pre></div>
</div>
<p><code class="docutils literal"><span class="pre">reader_creator</span></code>生成的<code class="docutils literal"><span class="pre">reader</span></code>传入<code class="docutils literal"><span class="pre">convert</span></code>函数即可完成转换:</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">convert</span><span class="p">(</span><span class="s2">&quot;./&quot;</span><span class="p">,</span> <span class="n">reader_creator</span><span class="p">(),</span> <span class="mi">100</span><span class="p">,</span> <span class="n">random_images</span><span class="p">)</span>
</pre></div>
</div>
<p>以上命令会在当前目录下生成100个文件:</p>
<div class="highlight-text"><div class="highlight"><pre><span></span>random_images-00000-of-00099
random_images-00001-of-00099
...
random_images-00099-of-00099
296 297 298
</pre></div>
</div>
</div>
299
<div class="section" id="">
300 301
<span id="id9"></span><h3>进行训练<a class="headerlink" href="#" title="Permalink to this headline"></a></h3>
<p>PaddlePaddle提供专用的<a class="reference external" href="https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/reader/README.md#python-data-reader-design-doc">data reader creator</a>,生成给定<code class="docutils literal"><span class="pre">RecordIO</span></code>文件对应的data reader。<strong>无论在本地还是在云端,reader的使用方式都是一致的</strong></p>
302
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># ...</span>
303
<span class="n">reader</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">reader</span><span class="o">.</span><span class="n">creator</span><span class="o">.</span><span class="n">RecordIO</span><span class="p">(</span><span class="s2">&quot;/pfs/datacenter_name/home/user_name/random_images-*-of-*&quot;</span><span class="p">)</span>
304 305 306 307
<span class="n">batch_reader</span> <span class="o">=</span> <span class="n">paddle</span><span class="o">.</span><span class="n">batch</span><span class="p">(</span><span class="n">paddle</span><span class="o">.</span><span class="n">dataset</span><span class="o">.</span><span class="n">mnist</span><span class="o">.</span><span class="n">train</span><span class="p">(),</span> <span class="mi">128</span><span class="p">)</span>
<span class="n">trainer</span><span class="o">.</span><span class="n">train</span><span class="p">(</span><span class="n">batch_reader</span><span class="p">,</span> <span class="o">...</span><span class="p">)</span>
</pre></div>
</div>
308
<p>以上代码的reader输出的data instance与生成数据集时,reader输出的data instance是一模一样的。</p>
309 310
</div>
</div>
311
<div class="section" id="">
312
<span id="id10"></span><h2>上传训练文件<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
313
<p>使用下面命令,可以把本地的数据上传到存储集群中。</p>
314
<div class="highlight-bash"><div class="highlight"><pre><span></span>paddle pfs cp filename /pfs/<span class="nv">$DATACENTER</span>/home/<span class="nv">$USER</span>/folder/
315 316 317
</pre></div>
</div>
<p>比如,把之前示例中转换完毕的random_images数据集上传到云端的<code class="docutils literal"><span class="pre">/home/</span></code>可以用以下指令:</p>
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
<div class="highlight-bash"><div class="highlight"><pre><span></span>paddle pfs cp random_images-*-of-* /pfs/<span class="nv">$DATACENTER</span>/home/<span class="nv">$USER</span>/folder/
</pre></div>
</div>
<p>需要<code class="docutils literal"><span class="pre">$DATACENTER</span></code>的配置写到配置文件中,例如</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="c1"># config file</span>
<span class="p">[</span><span class="n">datacenter_1</span><span class="p">]</span>
<span class="n">username</span><span class="o">=</span><span class="n">user</span>
<span class="n">usercert</span><span class="o">=</span><span class="n">user</span><span class="o">.</span><span class="n">pem</span>
<span class="n">userkey</span><span class="o">=</span><span class="n">user</span><span class="o">-</span><span class="n">key</span><span class="o">.</span><span class="n">pem</span>
<span class="n">endpoint</span><span class="o">=</span><span class="n">datacenter1</span><span class="o">.</span><span class="n">paddlepaddle</span><span class="o">.</span><span class="n">org</span>

<span class="p">[</span><span class="n">datacenter_2</span><span class="p">]</span>
<span class="n">username</span><span class="o">=</span><span class="n">user</span>
<span class="n">usercert</span><span class="o">=</span><span class="n">user</span><span class="o">.</span><span class="n">pem</span>
<span class="n">userkey</span><span class="o">=</span><span class="n">user</span><span class="o">-</span><span class="n">key</span><span class="o">.</span><span class="n">pem</span>
<span class="n">endpoint</span><span class="o">=</span><span class="n">datacenter2</span><span class="o">.</span><span class="n">paddlepaddle</span><span class="o">.</span><span class="n">org</span>
334 335 336 337 338 339
</pre></div>
</div>
</div>
</div>
<div class="section" id="todo">
<span id="todo"></span><h1>TODO<a class="headerlink" href="#todo" title="Permalink to this headline"></a></h1>
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
<div class="section" id="">
<span id="id11"></span><h2>文件访问的权限<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
<p>控制用户权限</p>
<ul class="simple">
<li>用户可以把自己的数据分享给别人</li>
</ul>
</div>
<div class="section" id="">
<span id="id12"></span><h2>文件访问方式<a class="headerlink" href="#" title="Permalink to this headline"></a></h2>
<p>不用mount的方式来访问数据,而是直接用API的接口远程访问</p>
<p>例如:</p>
<div class="highlight-default"><div class="highlight"><pre><span></span><span class="n">f</span> <span class="o">=</span> <span class="nb">open</span><span class="p">(</span><span class="s1">&#39;/pfs/datacenter_name/home/user_name/test1.dat&#39;</span><span class="p">)</span>
</pre></div>
</div>
</div>
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
<div class="section" id="job">
<span id="job"></span><h2>支持用户自定义的数据预处理job<a class="headerlink" href="#job" title="Permalink to this headline"></a></h2>
</div>
</div>


           </div>
          </div>
          <footer>
  

  <hr/>

  <div role="contentinfo">
    <p>
        &copy; Copyright 2016, PaddlePaddle developers.

    </p>
  </div>
  Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. 

</footer>

        </div>
      </div>

    </section>

  </div>
  


  

    <script type="text/javascript">
        var DOCUMENTATION_OPTIONS = {
            URL_ROOT:'../../',
            VERSION:'',
            COLLAPSE_INDEX:false,
            FILE_SUFFIX:'.html',
395 396
            HAS_SOURCE:  true,
            SOURCELINK_SUFFIX: ".txt",
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
        };
    </script>
      <script type="text/javascript" src="../../_static/jquery.js"></script>
      <script type="text/javascript" src="../../_static/underscore.js"></script>
      <script type="text/javascript" src="../../_static/doctools.js"></script>
      <script type="text/javascript" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script>
       
  

  
  
    <script type="text/javascript" src="../../_static/js/theme.js"></script>
  
  
  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
  <script src="../../_static/js/paddle_doc_init.js"></script> 

</body>
</html>