Merge pull request #2 from PaddlePaddle/develop

rebase

Merge pull request #2 from PaddlePaddle/develop
rebase
5777fae7 · hohdiy · GitHub · 021b3a44 · f93af824 · 5777fae7
24 changed file
--- a/.travis.yml
+++ b/.travis.yml
@@ -50,7 +50,7 @@ before_install:
    fi
  - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
-  - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy
+  - pip install wheel protobuf sphinx breathe recommonmark virtualenv numpy sphinx_rtd_theme
 script:
  - paddle/scripts/travis/main.sh
 notifications:

--- a/demo/image_classification/predict.sh
+++ b/demo/image_classification/predict.sh
--- a/demo/semantic_role_labeling/predict.sh
+++ b/demo/semantic_role_labeling/predict.sh
--- a/demo/semantic_role_labeling/test.sh
+++ b/demo/semantic_role_labeling/test.sh
--- a/demo/semantic_role_labeling/train.sh
+++ b/demo/semantic_role_labeling/train.sh
--- a/demo/seqToseq/dataprovider.py
+++ b/demo/seqToseq/dataprovider.py
@@ -19,27 +19,44 @@ START = "<s>"
 END = "<e>"
-def hook(settings, src_dict, trg_dict, file_list, **kwargs):
+def hook(settings, src_dict_path, trg_dict_path, is_generating, file_list,
+         **kwargs):
    # job_mode = 1: training mode
    # job_mode = 0: generating mode
-    settings.job_mode = trg_dict is not None
+    settings.job_mode = not is_generating
-    settings.src_dict = src_dict
+    settings.src_dict = dict()
+    with open(src_dict_path, "r") as fin:
+        settings.src_dict = {
+            line.strip(): line_count
+            for line_count, line in enumerate(fin)
+        }
+    settings.trg_dict = dict()
+    with open(trg_dict_path, "r") as fin:
+        settings.trg_dict = {
+            line.strip(): line_count
+            for line_count, line in enumerate(fin)
+        }
    settings.logger.info("src dict len : %d" % (len(settings.src_dict)))
    settings.sample_count = 0
    if settings.job_mode:
-        settings.trg_dict = trg_dict
+        settings.slots = {
-        settings.slots = [
+            'source_language_word':
            integer_value_sequence(len(settings.src_dict)),
+            'target_language_word':
            integer_value_sequence(len(settings.trg_dict)),
+            'target_language_next_word':
            integer_value_sequence(len(settings.trg_dict))
-        ]
+        }
        settings.logger.info("trg dict len : %d" % (len(settings.trg_dict)))
    else:
-        settings.slots = [
+        settings.slots = {
+            'source_language_word':
            integer_value_sequence(len(settings.src_dict)),
+            'sent_id':
            integer_value_sequence(len(open(file_list[0], "r").readlines()))
-        ]
+        }
 def _get_ids(s, dictionary):
@@ -69,6 +86,10 @@ def process(settings, file_name):
                    continue
                trg_ids_next = trg_ids + [settings.trg_dict[END]]
                trg_ids = [settings.trg_dict[START]] + trg_ids
-                yield src_ids, trg_ids, trg_ids_next
+                yield {
+                    'source_language_word': src_ids,
+                    'target_language_word': trg_ids,
+                    'target_language_next_word': trg_ids_next
+                }
            else:
-                yield src_ids, [line_count]
+                yield {'source_language_word': src_ids, 'sent_id': [line_count]}
--- a/demo/seqToseq/seqToseq_net.py
+++ b/demo/seqToseq/seqToseq_net.py
@@ -37,17 +37,10 @@ def seq_to_seq_data(data_dir,
    """
    src_lang_dict = os.path.join(data_dir, 'src.dict')
    trg_lang_dict = os.path.join(data_dir, 'trg.dict')
-    src_dict = dict()
-    for line_count, line in enumerate(open(src_lang_dict, "r")):
-        src_dict[line.strip()] = line_count
-    trg_dict = dict()
-    for line_count, line in enumerate(open(trg_lang_dict, "r")):
-        trg_dict[line.strip()] = line_count
    if is_generating:
        train_list = None
        test_list = os.path.join(data_dir, gen_list)
-        trg_dict = None
    else:
        train_list = os.path.join(data_dir, train_list)
        test_list = os.path.join(data_dir, test_list)
@@ -57,8 +50,11 @@ def seq_to_seq_data(data_dir,
        test_list,
        module="dataprovider",
        obj="process",
-        args={"src_dict": src_dict,
+        args={
-              "trg_dict": trg_dict})
+            "src_dict_path": src_lang_dict,
+            "trg_dict_path": trg_lang_dict,
+            "is_generating": is_generating
+        })
    return {
        "src_dict_path": src_lang_dict,

--- a/doc/conf.py.in
+++ b/doc/conf.py.in
@@ -23,7 +23,7 @@ AutoStructify = transform.AutoStructify
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, '@PROJ_ROOT@/python')
-templates_path = ["@PROJ_ROOT@/doc/templates"]
+templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
 # -- General configuration ------------------------------------------------
@@ -113,13 +113,12 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-#html_theme = 'sphinx_rtd_theme'
+html_theme = 'sphinx_rtd_theme'
-html_theme = 'classic'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ['@PROJ_ROOT@/doc_theme/static']
 # Output file base name for HTML help builder.
 htmlhelp_basename = project + 'doc'

--- a/doc/howto/cmd_parameter/arguments.md
+++ b/doc/howto/cmd_parameter/arguments.md
@@ -143,7 +143,7 @@ It looks like there are a lot of arguments. However, most of them are for develo
 </tr>
 <tr>
-<td class="left" rowspan = "2">testing during training</td><td class="left">test_all_data_in_one_period</td>
+<td class="left" rowspan = "2">testing during training</td><td class="left">test_period</td>
 <td class="left">√</td><td class="left">√</td><td class="left"></td><td class="left"></td>
 </tr>

--- a/doc/howto/cmd_parameter/detail_introduction.md
+++ b/doc/howto/cmd_parameter/detail_introduction.md
@@ -31,7 +31,7 @@
  - type: string (default: null).
 * `--version`
-  - Whether to print version infomatrion.
+  - Whether to print version information.
  - type: bool (default: 0).
 * `--show_layer_stat`
@@ -110,8 +110,8 @@
  - type: int32 (default: -1).
 * `--test_period`
-  - Run testing every test_period train batches. If not set, run testing each pass.
+   - if equal 0, do test on all test data at the end of each pass. While if equal non-zero, do test on all test data every test_period batches.
-  - type: int32 (default: 1000).
+  - type: int32 (default: 0).
 * `--test_wait`
  - Whether to wait for parameter per pass if not exist. If set test_data_path in submitting environment of cluster, it will launch one process to perfom testing, so we need to set test_wait=1. Note that in the cluster submitting environment, this argument has been set True by default.
@@ -121,10 +121,6 @@
  - File that saves the model list when testing. It was set automatically when using cluster submitting environment after setting model_path.
  - type: string (default: "", null).
-* `--test_all_data_in_one_period`
-  - This argument is usually used in testing period during traning. If true, all data will be tested in one test period. Otherwise (batch_size * log_peroid) data will be tested.
-  - type: bool (default: 0).
 * `--predict_output_dir`
  - Directory that saves the layer output. It is configured in Outputs() in network config. Default, this argument is null, meaning save nothing. Specify this directory if you want to save feature map of some layers in testing mode. Note that, layer outputs are values after activation function.
  - type: string (default: "", null).

--- a/doc/howto/cmd_parameter/use_case.md
+++ b/doc/howto/cmd_parameter/use_case.md
@@ -10,9 +10,8 @@ paddle train \
  --config=network_config \
  --save_dir=output \
  --trainer_count=COUNT \                #(default:1)
-  --test_period=M \                      #(default:1000）
+  --test_period=M \                      #(default:0) 
-  --test_all_data_in_one_period=true \   #(default:false) 
+  --num_passes=N \                       #(defalut:100)
-  --num_passes=N \                       #(defalut:100）
  --log_period=K \                       #(default:100)
  --dot_period=1000 \                    #(default:1)
  #[--show_parameter_stats_period=100] \ #(default:0)

--- a/doc_cn/conf.py.in
+++ b/doc_cn/conf.py.in
@@ -22,7 +22,7 @@ AutoStructify = transform.AutoStructify
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, '@PROJ_ROOT@/python')
-templates_path = ["@PROJ_ROOT@/doc/templates"]
+templates_path = ["@PROJ_ROOT@/doc_theme/templates"]
 # -- General configuration ------------------------------------------------
@@ -112,12 +112,12 @@ todo_include_todos = False
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-#html_theme = 'sphinx_rtd_theme'  # sphinx_rtd_theme will cause table bad style
+html_theme = 'sphinx_rtd_theme'
-html_theme = 'classic'
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ['@PROJ_ROOT@/doc_theme/static']
 # Output file base name for HTML help builder.
 htmlhelp_basename = project + 'doc'

--- a/doc_cn/faq/index.rst
+++ b/doc_cn/faq/index.rst
@@ -214,3 +214,41 @@ PaddlePaddle的参数使用名字 :code:`name` 作为参数的ID，相同名字
        cmake .. -DPYTHON_EXECUTABLE=<exc_path> -DPYTHON_LIBRARY=<lib_path>  -DPYTHON_INCLUDE_DIR=<inc_path>
 用户需要指定本机上Python的路径：``<exc_path>``, ``<lib_path>``, ``<inc_path>``
+10. A protocol message was rejected because it was too big
+----------------------------------------------------------
+如果在训练NLP相关模型时，出现以下错误：
+..  code-block:: bash
+    [libprotobuf ERROR google/protobuf/io/coded_stream.cc:171] A protocol message was rejected because it was too big (more than 67108864 bytes).  To increase the limit (or to disable these warnings), see CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.
+    F1205 14:59:50.295174 14703 TrainerConfigHelper.cpp:59] Check failed: m->conf.ParseFromString(configProtoStr) 
+可能的原因是：传给dataprovider的某一个args过大，一般是由于直接传递大字典导致的。错误的define_py_data_sources2类似：
+..  code-block:: python
+     src_dict = dict()
+     for line_count, line in enumerate(open(src_dict_path, "r")):
+        src_dict[line.strip()] = line_count
+     define_py_data_sources2(
+        train_list,
+        test_list,
+        module="dataprovider",
+        obj="process",
+        args={"src_dict": src_dict})
+解决方案是：将字典的地址作为args传给dataprovider，然后在dataprovider里面根据该地址加载字典。即define_py_data_sources2应改为：
+..  code-block:: python
+     define_py_data_sources2(
+        train_list,
+        test_list,
+        module="dataprovider",
+        obj="process",
+        args={"src_dict_path": src_dict_path})
+完整源码可参考 `seqToseq <https://github.com/PaddlePaddle/Paddle/tree/develop/demo/seqToseq>`_ 示例。
\ No newline at end of file
--- a/doc_theme/static/css/override.css
+++ b/doc_theme/static/css/override.css
--- a/doc_theme/static/images/PP_w.png
+++ b/doc_theme/static/images/PP_w.png
--- a/doc_theme/static/js/paddle_doc_init.js
+++ b/doc_theme/static/js/paddle_doc_init.js
+$(document).ready(function(){
+    $('.local-toc').on('click' ,'a.reference.internal', function (){
+        $('.local-toc li.active').removeClass('active');
+        $(this).parent('li').addClass('active');
+    });
+    if ($('.local-toc a:visible').length) {
+        $('.local-toc > ul').addClass('nav nav-stacked');
+        $('#doc-content').scrollspy({
+            target: '.local-toc'
+        });
+		$('.local-toc').perfectScrollbar();
+    } else {
+		$('.doc-content-wrap').css('margin-left', '-=50px');
+        $('.local-toc').remove();
+    }
+    if (!$('.doc-menu-vertical > ul > li.current > ul').length) {
+        $('.doc-content-wrap').css('margin-left', '-=240px');
+        $('.doc-menu-vertical').remove();
+        $('.local-toc').css('left', '0');
+    }
+	$('.doc-menu-vertical .toctree-l2').each(function (i, e){
+        $(e).toggleClass('has-child', !!$(e).find('ul').length);
+    });
+    $('.doc-menu-vertical').find('li.current').last().addClass('active');
+    $('.doc-menu-vertical').perfectScrollbar();
+});
\ No newline at end of file
--- a/doc_theme/templates/breadcrumbs.html
+++ b/doc_theme/templates/breadcrumbs.html
+{# Support for Sphinx 1.3+ page_source_suffix, but don't break old builds. #}
+{% if page_source_suffix %} 
+{% set suffix = page_source_suffix %}
+{% else %}
+{% set suffix = source_suffix %}
+{% endif %}
+{% if meta is defined and 'github_url' in meta %}
+{% set display_github = True %}
+{% endif %}
+{% if meta is defined and 'bitbucket_url' in meta %}
+{% set display_bitbucket = True %}
+{% endif %}
+<div role="navigation" aria-label="breadcrumbs navigation">
+  <ul class="wy-breadcrumbs">
+      {% for doc in parents %}
+        <li><a href="{{ doc.link|e }}">{{ doc.title }}</a> > </li>
+      {% endfor %}
+    <li>{{ title }}</li>
+  </ul>
+</div>
--- a/doc_theme/templates/layout.html
+++ b/doc_theme/templates/layout.html
+{# TEMPLATE VAR SETTINGS #}
+{%- set url_root = pathto('', 1) %}
+{%- if url_root == '#' %}{% set url_root = '' %}{% endif %}
+{%- if not embedded and docstitle %}
+  {%- set titlesuffix = " &mdash; "|safe + docstitle|e %}
+{%- else %}
+  {%- set titlesuffix = "" %}
+{%- endif %}
+<!DOCTYPE html>
+<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
+<head>
+  <meta charset="utf-8">
+  {{ metatags }}
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  {% block htmltitle %}
+  <title>{{ title|striptags|e }}{{ titlesuffix }}</title>
+  {% endblock %}
+  {# FAVICON #}
+  {% if favicon %}
+    <link rel="shortcut icon" href="{{ pathto('_static/' + favicon, 1) }}"/>
+  {% endif %}
+  {# CSS #}
+  {# OPENSEARCH #}
+  {% if not embedded %}
+    {% if use_opensearch %}
+      <link rel="search" type="application/opensearchdescription+xml" title="{% trans docstitle=docstitle|e %}Search within {{ docstitle }}{% endtrans %}" href="{{ pathto('_static/opensearch.xml', 1) }}"/>
+    {% endif %}
+  {% endif %}
+  {# RTD hosts this file, so just load on non RTD builds #}
+  {% if not READTHEDOCS %}
+    <link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
+  {% endif %}
+  {% for cssfile in css_files %}
+    <link rel="stylesheet" href="{{ pathto(cssfile, 1) }}" type="text/css" />
+  {% endfor %}
+  {% for cssfile in extra_css_files %}
+    <link rel="stylesheet" href="{{ pathto(cssfile, 1) }}" type="text/css" />
+  {% endfor %}
+  {%- block linktags %}
+    {%- if hasdoc('about') %}
+        <link rel="author" title="{{ _('About these documents') }}"
+              href="{{ pathto('about') }}"/>
+    {%- endif %}
+    {%- if hasdoc('genindex') %}
+        <link rel="index" title="{{ _('Index') }}"
+              href="{{ pathto('genindex') }}"/>
+    {%- endif %}
+    {%- if hasdoc('search') %}
+        <link rel="search" title="{{ _('Search') }}" href="{{ pathto('search') }}"/>
+    {%- endif %}
+    {%- if hasdoc('copyright') %}
+        <link rel="copyright" title="{{ _('Copyright') }}" href="{{ pathto('copyright') }}"/>
+    {%- endif %}
+    <link rel="top" title="{{ docstitle|e }}" href="{{ pathto('index') }}"/>
+    {%- if parents %}
+        <link rel="up" title="{{ parents[-1].title|striptags|e }}" href="{{ parents[-1].link|e }}"/>
+    {%- endif %}
+    {%- if next %}
+        <link rel="next" title="{{ next.title|striptags|e }}" href="{{ next.link|e }}"/>
+    {%- endif %}
+    {%- if prev %}
+        <link rel="prev" title="{{ prev.title|striptags|e }}" href="{{ prev.link|e }}"/>
+    {%- endif %}
+  {%- endblock %}
+  {%- block extrahead %} 
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/css/perfect-scrollbar.min.css" type="text/css" />
+  <link rel="stylesheet" href="{{pathto('_static/css/override.css', 1)}}" type="text/css" />
+  <script>
+  var _hmt = _hmt || [];
+  (function() {
+    var hm = document.createElement("script");
+    hm.src = "//hm.baidu.com/hm.js?b9a314ab40d04d805655aab1deee08ba";
+    var s = document.getElementsByTagName("script")[0]; 
+    s.parentNode.insertBefore(hm, s);
+  })();
+  </script>
+  {% endblock %}
+  {# Keep modernizr in head - http://modernizr.com/docs/#installing #}
+  <script src="{{ pathto('_static/js/modernizr.min.js', 1) }}"></script>
+</head>
+<body class="wy-body-for-nav" role="document">
+  {% block extrabody %}
+  <header class="site-header">
+    <div class="site-logo">
+      <a href="/"><img src="{{pathto('_static/images/PP_w.png', 1)}}"></a>
+    </div>
+    <div class="site-nav-links">
+      <div class="site-menu">
+        <a class="fork-on-github" href="https://github.com/PaddlePaddle/Paddle" target="_blank"><i class="fa fa-github"></i>Folk me on Github</a>
+        <div class="language-switcher dropdown">
+          <a type="button" data-toggle="dropdown">
+            <span>English</span>
+            <i class="fa fa-angle-up"></i>
+            <i class="fa fa-angle-down"></i>
+          </a>
+          <ul class="dropdown-menu">
+            <li><a href="/doc_cn">中文</a></li>
+            <li><a href="/doc">English</a></li>
+          </ul>
+        </div>
+        <ul class="site-page-links">
+          <li><a>Home</a></li>
+          <li><a>Get Started</a></li>
+          <li class="active"><a>Documentation</a></li>
+          <li><a>About Us</a></li>
+        </ul>
+      </div>
+      <div class="doc-module">
+        {%set modules = toctree(maxdepth=0, collapse=False, titles_only=True)%}
+        {{modules}}
+        {% include "searchbox.html" %}        
+      </div>
+    </div>
+  </header>
+  {% endblock %}
+  <div class="main-content-wrap">
+    {# SIDE NAV, TOGGLES ON MOBILE #}
+    <nav class="doc-menu-vertical" role="navigation">
+        {% block menu %}
+          {% set toctree = toctree(maxdepth=-1, collapse=False,titles_only=True, includehidden=True) %}
+          {{ toctree }}
+        {% endblock %}
+    </nav>
+    {% if toc %}
+    <nav class="local-toc">{{ toc }}</nav>
+    {% endif %}
+    <section class="doc-content-wrap">
+      {% include "breadcrumbs.html" %}
+      {# PAGE CONTENT #}
+      <div class="wy-nav-content" id="doc-content">
+        <div class="rst-content">
+          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
+           <div itemprop="articleBody">
+            {% block body %}{% endblock %}
+           </div>
+          </div>
+          {% include "footer.html" %}
+        </div>
+      </div>
+    </section>
+  </div>
+  {% include "versions.html" %}
+  {% if not embedded %}
+    <script type="text/javascript">
+        var DOCUMENTATION_OPTIONS = {
+            URL_ROOT:'{{ url_root }}',
+            VERSION:'{{ release|e }}',
+            COLLAPSE_INDEX:false,
+            FILE_SUFFIX:'{{ '' if no_search_suffix else file_suffix }}',
+            HAS_SOURCE:  {{ has_source|lower }}
+        };
+    </script>
+    {%- for scriptfile in script_files %}
+      <script type="text/javascript" src="{{ pathto(scriptfile, 1) }}"></script>
+    {%- endfor %}
+  {% endif %}
+  {# RTD hosts this file, so just load on non RTD builds #}
+  {% if not READTHEDOCS %}
+    <script type="text/javascript" src="{{ pathto('_static/js/theme.js', 1) }}"></script>
+  {% endif %}
+  <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
+  <script src="https://cdn.jsdelivr.net/perfect-scrollbar/0.6.14/js/perfect-scrollbar.jquery.min.js"></script>
+  <script src="{{ pathto('_static/js/paddle_doc_init.js', 1) }}"></script>
+  {%- block footer %} {% endblock %}
+</body>
+</html>
--- a/doc_theme/templates/search.html
+++ b/doc_theme/templates/search.html
+{#
+    basic/search.html
+    ~~~~~~~~~~~~~~~~~
+    Template for the search page.
+    :copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+#}
+{%- extends "layout.html" %}
+{% set title = _('Search') %}
+{% set script_files = script_files + ['_static/searchtools.js'] %}
+{% block footer %}
+  <script type="text/javascript">
+    jQuery(function() { Search.loadIndex("{{ pathto('searchindex.js', 1) }}"); });
+    jQuery('.doc-content-wrap > div[role="navigation"]').remove();
+    jQuery('.doc-content-wrap').css('padding-top', 0);
+  </script>
+  {# this is used when loading the search index using $.ajax fails,
+     such as on Chrome for documents on localhost #}
+  <script type="text/javascript" id="searchindexloader"></script>
+  {{ super() }}
+{% endblock %}
+{% block body %}
+  <noscript>
+  <div id="fallback" class="admonition warning">
+    <p class="last">
+      {% trans %}Please activate JavaScript to enable the search
+      functionality.{% endtrans %}
+    </p>
+  </div>
+  </noscript>
+  {% if search_performed %}
+    <h2>{{ _('Search Results') }}</h2>
+    {% if not search_results %}
+      <p>{{ _('Your search did not match any documents. Please make sure that all words are spelled correctly and that you\'ve selected enough categories.') }}</p>
+    {% endif %}
+  {% endif %}
+  <div id="search-results">
+  {% if search_results %}
+    <ul>
+    {% for href, caption, context in search_results %}
+      <li>
+        <a href="{{ pathto(item.href) }}">{{ caption }}</a>
+        <p class="context">{{ context|e }}</p>
+      </li>
+    {% endfor %}
+    </ul>
+  {% endif %}
+  </div>
+{% endblock %}
--- a/paddle/scripts/tools/build_docs/Dockerfile
+++ b/paddle/scripts/tools/build_docs/Dockerfile
 FROM paddledev/paddle:cpu-devel-latest
 COPY build.sh /
 RUN pip install sphinx &&\
+    pip install sphinx_rtd_theme &&\
    apt install -y doxygen graphviz &&\
    pip install breathe recommonmark numpy protobuf==2.6.1
 CMD /build.sh
--- a/paddle/trainer/Tester.cpp
+++ b/paddle/trainer/Tester.cpp
@@ -87,10 +87,8 @@ void Tester::testOneDataBatch(const DataBatch& dataBatch,
 void Tester::testOnePeriod() {
  DataBatch dataBatch;
  int64_t batchSize = config_->getOptConfig().batch_size();
-  bool testAllData =
-      intconfig_->testPeriod == 0 || intconfig_->testAllDataInOnePeriod;
+  int batches = std::numeric_limits<int>::max();
-  int batches =
-      testAllData ? std::numeric_limits<int>::max() : intconfig_->testPeriod;
  std::vector<Argument> outArgs;
@@ -102,11 +100,7 @@ void Tester::testOnePeriod() {
      if (intconfig_->prevBatchState) {
        gradientMachine_->resetState();
      }
-      if (testAllData) {
+      break;
-        break;
-      } else {
-        num = testDataProvider_->getNextBatch(batchSize, &dataBatch);
-      }
    }
    testOneDataBatch(dataBatch, &outArgs);
  }

--- a/paddle/trainer/TesterConfig.h
+++ b/paddle/trainer/TesterConfig.h
@@ -39,11 +39,6 @@ struct TesterConfig {
   */
  int testPeriod;
-  /**
-   * indicate whether testing data in one period
-   */
-  bool testAllDataInOnePeriod;
  /**
   * indicate whether to save previous batch state
   */

--- a/paddle/trainer/Trainer.cpp
+++ b/paddle/trainer/Trainer.cpp
@@ -39,20 +39,16 @@ limitations under the License. */
 #include "TrainerConfigHelper.h"
 P_DEFINE_string(config, "", "Trainer config file");
-P_DEFINE_int32(test_period,
-               0,
-               "Run test every so many train batches."
-               " 0 for testing after each pass."
-               " If not 0, test log_period batches."
-               " If 0, test on all test data");
-P_DEFINE_bool(local, true, "Train in local mode or not");
+P_DEFINE_int32(test_period, 0,
+               "if equal 0, do test on all test data at the end of "
+               "each pass. While if equal non-zero, do test on all test "
+               "data every test_period batches");
+P_DEFINE_bool(test_all_data_in_one_period, false,
+               "This option was deprecated, since we will always do "
+               "test on all test set ");
-P_DEFINE_bool(
+P_DEFINE_bool(local, true, "Train in local mode or not");
-    test_all_data_in_one_period,
-    false,
-    "true will test all data in one test peroid."
-    "Otherwise test (batch_size * log_peroid) data in one test period.");
 P_DEFINE_int32(average_test_period,
               0,
@@ -633,8 +629,19 @@ void Trainer::test() { tester_->test(); }
 std::unique_ptr<TesterConfig> Trainer::createTesterConfig() {
  TesterConfig* conf = new TesterConfig;
+  if (FLAGS_test_period) {
+    LOG(WARNING)
+      << "The meaning of --test_period is changed: "
+      << "if equal 0, do test on all test data at the end of "
+      << "each pass. While if equal non-zero, do test on all test "
+      << "data every test_period batches ";
+  }
+  if (FLAGS_test_all_data_in_one_period) {
+    LOG(WARNING)
+      << "--test_all_data_in_one_period was deprecated, since "
+      << "we will always do test on all test set ";
+  }
  conf->testPeriod = FLAGS_test_period;
-  conf->testAllDataInOnePeriod = FLAGS_test_all_data_in_one_period;
  conf->prevBatchState = FLAGS_prev_batch_state;
  conf->logPeriod = FLAGS_log_period;
  conf->loadsaveParametersInPserver = FLAGS_loadsave_parameters_in_pserver;

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -3377,7 +3377,20 @@ def parse_config(config_file, config_arg_str):
    g_root_submodel.is_recurrent_layer_group = False
    g_current_submodel = g_root_submodel
-    execfile(config_file, make_config_environment(config_file, config_args))
+    # for paddle on spark, need support non-file config.
+    # you can use parse_config like below:
+    #
+    # from paddle.trainer.config_parser import parse_config
+    # def configs():
+    #    #your paddle config code, which is same as config file.
+    #
+    # config = parse_config(configs, "is_predict=1")
+    # # then you get config proto object.
+    if hasattr(config_file, '__call__'):
+      config_file.func_globals.update(make_config_environment("", config_args))
+      config_file()
+    else:
+      execfile(config_file, make_config_environment(config_file, config_args))
    for k, v in settings.iteritems():
        if v is None:
            continue