提交 4f5b5e28 编写于 作者: T Travis CI

Deploy to GitHub Pages: 2edeb639

上级 b47e3d9f
...@@ -39,15 +39,16 @@ In the backward pass ...@@ -39,15 +39,16 @@ In the backward pass
This implementation allows to write mixed device program like this This implementation allows to write mixed device program like this
```python ```python
# get embedding feature on CPU W1 = fluid.tensor(size=[100,20], parameter=true)
feature = some_cpu_only_op(data) W2 = fluid.tensor(size=[20,15], parameter=true)
gpu_places = get_place(use_gpu=True) data = layers.data()
gpu_places = layers.get_place(use_gpu=True)
# parallel processing on multiple GPUs # parallel processing on multiple GPUs
pd = ParallelDo(gpu_places) pd = ParallelDo(gpu_places)
with pd.do(): with pd.do(input=data):
read_input(feature) prediction = softmax(fc(fc(data, W1), W2))
prediction = my_net(feature)
write_output(prediction) write_output(prediction)
prediction = pd() prediction = pd()
loss = cross_entropy(prediction, label) loss = cross_entropy(prediction, label)
...@@ -66,20 +67,20 @@ start_program ...@@ -66,20 +67,20 @@ start_program
main_program main_program
{ {
block0 { block0 {
vars: data, places, w1, w2 vars: data, places, w1, w2, w1_grad, w2_grad,
ops: data, get_place, parallel_do(block1), ops: data, get_place, parallel_do(block1),
parallel_do_grad(block2), parallel_do_grad(block2),
sgd(w2, w2_grad), sgd(w2, w2_grad),
sgd(w1, w1_grad) sgd(w1, w1_grad)
} }
block1 { block1 { # the forward pass
parent_block: 0 parent_block: 0
vars: data, h1, h2, loss vars: data, h1, h2, loss
ops: fc, fc, softmax ops: fc, fc, softmax
} }
block2 { block2 { # the backward pass
parent_block: 1 parent_block: 1
vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad vars: data_grad, h1_grad, h2_grad, loss_gard, local_w1_grad, local_w2_grad
ops: softmax_grad, ops: softmax_grad,
fc_grad fc_grad
fc_grad fc_grad
......
...@@ -223,15 +223,16 @@ ...@@ -223,15 +223,16 @@
</pre></div> </pre></div>
</div> </div>
<p>This implementation allows to write mixed device program like this</p> <p>This implementation allows to write mixed device program like this</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># get embedding feature on CPU</span> <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">W1</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="p">[</span><span class="mi">100</span><span class="p">,</span><span class="mi">20</span><span class="p">],</span> <span class="n">parameter</span><span class="o">=</span><span class="n">true</span><span class="p">)</span>
<span class="n">feature</span> <span class="o">=</span> <span class="n">some_cpu_only_op</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="n">W2</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="p">[</span><span class="mi">20</span><span class="p">,</span><span class="mi">15</span><span class="p">],</span> <span class="n">parameter</span><span class="o">=</span><span class="n">true</span><span class="p">)</span>
<span class="n">gpu_places</span> <span class="o">=</span> <span class="n">get_place</span><span class="p">(</span><span class="n">use_gpu</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span> <span class="n">data</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">data</span><span class="p">()</span>
<span class="n">gpu_places</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">get_place</span><span class="p">(</span><span class="n">use_gpu</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="c1"># parallel processing on multiple GPUs</span> <span class="c1"># parallel processing on multiple GPUs</span>
<span class="n">pd</span> <span class="o">=</span> <span class="n">ParallelDo</span><span class="p">(</span><span class="n">gpu_places</span><span class="p">)</span> <span class="n">pd</span> <span class="o">=</span> <span class="n">ParallelDo</span><span class="p">(</span><span class="n">gpu_places</span><span class="p">)</span>
<span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">do</span><span class="p">():</span> <span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">do</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">data</span><span class="p">):</span>
<span class="n">read_input</span><span class="p">(</span><span class="n">feature</span><span class="p">)</span> <span class="n">prediction</span> <span class="o">=</span> <span class="n">softmax</span><span class="p">(</span><span class="n">fc</span><span class="p">(</span><span class="n">fc</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">W1</span><span class="p">),</span> <span class="n">W2</span><span class="p">))</span>
<span class="n">prediction</span> <span class="o">=</span> <span class="n">my_net</span><span class="p">(</span><span class="n">feature</span><span class="p">)</span>
<span class="n">write_output</span><span class="p">(</span><span class="n">prediction</span><span class="p">)</span> <span class="n">write_output</span><span class="p">(</span><span class="n">prediction</span><span class="p">)</span>
<span class="n">prediction</span> <span class="o">=</span> <span class="n">pd</span><span class="p">()</span> <span class="n">prediction</span> <span class="o">=</span> <span class="n">pd</span><span class="p">()</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">cross_entropy</span><span class="p">(</span><span class="n">prediction</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> <span class="n">loss</span> <span class="o">=</span> <span class="n">cross_entropy</span><span class="p">(</span><span class="n">prediction</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
...@@ -248,20 +249,20 @@ ...@@ -248,20 +249,20 @@
<span class="n">main_program</span> <span class="n">main_program</span>
<span class="p">{</span> <span class="p">{</span>
<span class="n">block0</span> <span class="p">{</span> <span class="n">block0</span> <span class="p">{</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">places</span><span class="p">,</span> <span class="n">w1</span><span class="p">,</span> <span class="n">w2</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">places</span><span class="p">,</span> <span class="n">w1</span><span class="p">,</span> <span class="n">w2</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">,</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">get_place</span><span class="p">,</span> <span class="n">parallel_do</span><span class="p">(</span><span class="n">block1</span><span class="p">),</span> <span class="n">ops</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">get_place</span><span class="p">,</span> <span class="n">parallel_do</span><span class="p">(</span><span class="n">block1</span><span class="p">),</span>
<span class="n">parallel_do_grad</span><span class="p">(</span><span class="n">block2</span><span class="p">),</span> <span class="n">parallel_do_grad</span><span class="p">(</span><span class="n">block2</span><span class="p">),</span>
<span class="n">sgd</span><span class="p">(</span><span class="n">w2</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">),</span> <span class="n">sgd</span><span class="p">(</span><span class="n">w2</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">),</span>
<span class="n">sgd</span><span class="p">(</span><span class="n">w1</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">)</span> <span class="n">sgd</span><span class="p">(</span><span class="n">w1</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">)</span>
<span class="p">}</span> <span class="p">}</span>
<span class="n">block1</span> <span class="p">{</span> <span class="n">block1</span> <span class="p">{</span> <span class="c1"># the forward pass</span>
<span class="n">parent_block</span><span class="p">:</span> <span class="mi">0</span> <span class="n">parent_block</span><span class="p">:</span> <span class="mi">0</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">h1</span><span class="p">,</span> <span class="n">h2</span><span class="p">,</span> <span class="n">loss</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">h1</span><span class="p">,</span> <span class="n">h2</span><span class="p">,</span> <span class="n">loss</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">fc</span><span class="p">,</span> <span class="n">fc</span><span class="p">,</span> <span class="n">softmax</span> <span class="n">ops</span><span class="p">:</span> <span class="n">fc</span><span class="p">,</span> <span class="n">fc</span><span class="p">,</span> <span class="n">softmax</span>
<span class="p">}</span> <span class="p">}</span>
<span class="n">block2</span> <span class="p">{</span> <span class="n">block2</span> <span class="p">{</span> <span class="c1"># the backward pass</span>
<span class="n">parent_block</span><span class="p">:</span> <span class="mi">1</span> <span class="n">parent_block</span><span class="p">:</span> <span class="mi">1</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data_grad</span><span class="p">,</span> <span class="n">h1_grad</span><span class="p">,</span> <span class="n">h2_grad</span><span class="p">,</span> <span class="n">loss_gard</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">,</span> <span class="n">w2_grad</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data_grad</span><span class="p">,</span> <span class="n">h1_grad</span><span class="p">,</span> <span class="n">h2_grad</span><span class="p">,</span> <span class="n">loss_gard</span><span class="p">,</span> <span class="n">local_w1_grad</span><span class="p">,</span> <span class="n">local_w2_grad</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">softmax_grad</span><span class="p">,</span> <span class="n">ops</span><span class="p">:</span> <span class="n">softmax_grad</span><span class="p">,</span>
<span class="n">fc_grad</span> <span class="n">fc_grad</span>
<span class="n">fc_grad</span> <span class="n">fc_grad</span>
......
因为 它太大了无法显示 source diff 。你可以改为 查看blob
...@@ -39,15 +39,16 @@ In the backward pass ...@@ -39,15 +39,16 @@ In the backward pass
This implementation allows to write mixed device program like this This implementation allows to write mixed device program like this
```python ```python
# get embedding feature on CPU W1 = fluid.tensor(size=[100,20], parameter=true)
feature = some_cpu_only_op(data) W2 = fluid.tensor(size=[20,15], parameter=true)
gpu_places = get_place(use_gpu=True) data = layers.data()
gpu_places = layers.get_place(use_gpu=True)
# parallel processing on multiple GPUs # parallel processing on multiple GPUs
pd = ParallelDo(gpu_places) pd = ParallelDo(gpu_places)
with pd.do(): with pd.do(input=data):
read_input(feature) prediction = softmax(fc(fc(data, W1), W2))
prediction = my_net(feature)
write_output(prediction) write_output(prediction)
prediction = pd() prediction = pd()
loss = cross_entropy(prediction, label) loss = cross_entropy(prediction, label)
...@@ -66,20 +67,20 @@ start_program ...@@ -66,20 +67,20 @@ start_program
main_program main_program
{ {
block0 { block0 {
vars: data, places, w1, w2 vars: data, places, w1, w2, w1_grad, w2_grad,
ops: data, get_place, parallel_do(block1), ops: data, get_place, parallel_do(block1),
parallel_do_grad(block2), parallel_do_grad(block2),
sgd(w2, w2_grad), sgd(w2, w2_grad),
sgd(w1, w1_grad) sgd(w1, w1_grad)
} }
block1 { block1 { # the forward pass
parent_block: 0 parent_block: 0
vars: data, h1, h2, loss vars: data, h1, h2, loss
ops: fc, fc, softmax ops: fc, fc, softmax
} }
block2 { block2 { # the backward pass
parent_block: 1 parent_block: 1
vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad vars: data_grad, h1_grad, h2_grad, loss_gard, local_w1_grad, local_w2_grad
ops: softmax_grad, ops: softmax_grad,
fc_grad fc_grad
fc_grad fc_grad
......
...@@ -230,15 +230,16 @@ ...@@ -230,15 +230,16 @@
</pre></div> </pre></div>
</div> </div>
<p>This implementation allows to write mixed device program like this</p> <p>This implementation allows to write mixed device program like this</p>
<div class="highlight-python"><div class="highlight"><pre><span></span><span class="c1"># get embedding feature on CPU</span> <div class="highlight-python"><div class="highlight"><pre><span></span><span class="n">W1</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="p">[</span><span class="mi">100</span><span class="p">,</span><span class="mi">20</span><span class="p">],</span> <span class="n">parameter</span><span class="o">=</span><span class="n">true</span><span class="p">)</span>
<span class="n">feature</span> <span class="o">=</span> <span class="n">some_cpu_only_op</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> <span class="n">W2</span> <span class="o">=</span> <span class="n">fluid</span><span class="o">.</span><span class="n">tensor</span><span class="p">(</span><span class="n">size</span><span class="o">=</span><span class="p">[</span><span class="mi">20</span><span class="p">,</span><span class="mi">15</span><span class="p">],</span> <span class="n">parameter</span><span class="o">=</span><span class="n">true</span><span class="p">)</span>
<span class="n">gpu_places</span> <span class="o">=</span> <span class="n">get_place</span><span class="p">(</span><span class="n">use_gpu</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span> <span class="n">data</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">data</span><span class="p">()</span>
<span class="n">gpu_places</span> <span class="o">=</span> <span class="n">layers</span><span class="o">.</span><span class="n">get_place</span><span class="p">(</span><span class="n">use_gpu</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
<span class="c1"># parallel processing on multiple GPUs</span> <span class="c1"># parallel processing on multiple GPUs</span>
<span class="n">pd</span> <span class="o">=</span> <span class="n">ParallelDo</span><span class="p">(</span><span class="n">gpu_places</span><span class="p">)</span> <span class="n">pd</span> <span class="o">=</span> <span class="n">ParallelDo</span><span class="p">(</span><span class="n">gpu_places</span><span class="p">)</span>
<span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">do</span><span class="p">():</span> <span class="k">with</span> <span class="n">pd</span><span class="o">.</span><span class="n">do</span><span class="p">(</span><span class="nb">input</span><span class="o">=</span><span class="n">data</span><span class="p">):</span>
<span class="n">read_input</span><span class="p">(</span><span class="n">feature</span><span class="p">)</span> <span class="n">prediction</span> <span class="o">=</span> <span class="n">softmax</span><span class="p">(</span><span class="n">fc</span><span class="p">(</span><span class="n">fc</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">W1</span><span class="p">),</span> <span class="n">W2</span><span class="p">))</span>
<span class="n">prediction</span> <span class="o">=</span> <span class="n">my_net</span><span class="p">(</span><span class="n">feature</span><span class="p">)</span>
<span class="n">write_output</span><span class="p">(</span><span class="n">prediction</span><span class="p">)</span> <span class="n">write_output</span><span class="p">(</span><span class="n">prediction</span><span class="p">)</span>
<span class="n">prediction</span> <span class="o">=</span> <span class="n">pd</span><span class="p">()</span> <span class="n">prediction</span> <span class="o">=</span> <span class="n">pd</span><span class="p">()</span>
<span class="n">loss</span> <span class="o">=</span> <span class="n">cross_entropy</span><span class="p">(</span><span class="n">prediction</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span> <span class="n">loss</span> <span class="o">=</span> <span class="n">cross_entropy</span><span class="p">(</span><span class="n">prediction</span><span class="p">,</span> <span class="n">label</span><span class="p">)</span>
...@@ -255,20 +256,20 @@ ...@@ -255,20 +256,20 @@
<span class="n">main_program</span> <span class="n">main_program</span>
<span class="p">{</span> <span class="p">{</span>
<span class="n">block0</span> <span class="p">{</span> <span class="n">block0</span> <span class="p">{</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">places</span><span class="p">,</span> <span class="n">w1</span><span class="p">,</span> <span class="n">w2</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">places</span><span class="p">,</span> <span class="n">w1</span><span class="p">,</span> <span class="n">w2</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">,</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">get_place</span><span class="p">,</span> <span class="n">parallel_do</span><span class="p">(</span><span class="n">block1</span><span class="p">),</span> <span class="n">ops</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">get_place</span><span class="p">,</span> <span class="n">parallel_do</span><span class="p">(</span><span class="n">block1</span><span class="p">),</span>
<span class="n">parallel_do_grad</span><span class="p">(</span><span class="n">block2</span><span class="p">),</span> <span class="n">parallel_do_grad</span><span class="p">(</span><span class="n">block2</span><span class="p">),</span>
<span class="n">sgd</span><span class="p">(</span><span class="n">w2</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">),</span> <span class="n">sgd</span><span class="p">(</span><span class="n">w2</span><span class="p">,</span> <span class="n">w2_grad</span><span class="p">),</span>
<span class="n">sgd</span><span class="p">(</span><span class="n">w1</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">)</span> <span class="n">sgd</span><span class="p">(</span><span class="n">w1</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">)</span>
<span class="p">}</span> <span class="p">}</span>
<span class="n">block1</span> <span class="p">{</span> <span class="n">block1</span> <span class="p">{</span> <span class="c1"># the forward pass</span>
<span class="n">parent_block</span><span class="p">:</span> <span class="mi">0</span> <span class="n">parent_block</span><span class="p">:</span> <span class="mi">0</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">h1</span><span class="p">,</span> <span class="n">h2</span><span class="p">,</span> <span class="n">loss</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data</span><span class="p">,</span> <span class="n">h1</span><span class="p">,</span> <span class="n">h2</span><span class="p">,</span> <span class="n">loss</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">fc</span><span class="p">,</span> <span class="n">fc</span><span class="p">,</span> <span class="n">softmax</span> <span class="n">ops</span><span class="p">:</span> <span class="n">fc</span><span class="p">,</span> <span class="n">fc</span><span class="p">,</span> <span class="n">softmax</span>
<span class="p">}</span> <span class="p">}</span>
<span class="n">block2</span> <span class="p">{</span> <span class="n">block2</span> <span class="p">{</span> <span class="c1"># the backward pass</span>
<span class="n">parent_block</span><span class="p">:</span> <span class="mi">1</span> <span class="n">parent_block</span><span class="p">:</span> <span class="mi">1</span>
<span class="nb">vars</span><span class="p">:</span> <span class="n">data_grad</span><span class="p">,</span> <span class="n">h1_grad</span><span class="p">,</span> <span class="n">h2_grad</span><span class="p">,</span> <span class="n">loss_gard</span><span class="p">,</span> <span class="n">w1_grad</span><span class="p">,</span> <span class="n">w2_grad</span> <span class="nb">vars</span><span class="p">:</span> <span class="n">data_grad</span><span class="p">,</span> <span class="n">h1_grad</span><span class="p">,</span> <span class="n">h2_grad</span><span class="p">,</span> <span class="n">loss_gard</span><span class="p">,</span> <span class="n">local_w1_grad</span><span class="p">,</span> <span class="n">local_w2_grad</span>
<span class="n">ops</span><span class="p">:</span> <span class="n">softmax_grad</span><span class="p">,</span> <span class="n">ops</span><span class="p">:</span> <span class="n">softmax_grad</span><span class="p">,</span>
<span class="n">fc_grad</span> <span class="n">fc_grad</span>
<span class="n">fc_grad</span> <span class="n">fc_grad</span>
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册