diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 4b445b37e1eab95947255342bf35bac685ca961a..fadd9c280b0056849030dcbc26d289a5f92d91c9 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -967,6 +967,18 @@ function build_document_preview() { } +function example() { + pip install /paddle/build/python/dist/paddlepaddle-0.10.0-cp27-cp27mu-linux_x86_64.whl + paddle version + cd ${PADDLE_ROOT}/python/paddle/fluid + python sampcd_processor.py + if [ "$?" != "0" ];then + echo "Code instance execution failed" + exit 1 + fi +} + + function main() { local CMD=$1 local parallel_number=$2 @@ -981,6 +993,7 @@ function main() { build ${parallel_number} assert_api_not_changed ${PYTHON_ABI:-""} assert_api_spec_approvals + example ;; build) cmake_gen ${PYTHON_ABI:-""} @@ -1073,6 +1086,9 @@ function main() { build ${parallel_number} build_document_preview ;; + api_example) + example + ;; *) print_usage exit 1 diff --git a/python/paddle/fluid/sampcd_processor.py b/python/paddle/fluid/sampcd_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..c22e6473b8859222e32a517e38c1dbfbde8f689d --- /dev/null +++ b/python/paddle/fluid/sampcd_processor.py @@ -0,0 +1,561 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 14 14:10:36 2019 + +@author: haowang101779990 +""" +""" +This script is for scraping and executing sample codes in the +comments of paddle .py source file in order to validate the +sample codes. + +Put this script at directory fluid/ + +log July 4 : CPU is implemented, wlist is added, +transpiler module need to be finished + +""" + +import os +import subprocess + + +def find_all(srcstr, substr): + indices = [] + gotone = srcstr.find(substr) + while (gotone != -1): + indices.append(gotone) + gotone = srcstr.find(substr, gotone + 1) + return indices + + +def check_indent(cdline): + indent = 0 + for c in cdline: + if c == '\t': + indent += 4 + elif c == ' ': + indent += 1 + if c != ' ' and c != '\t': + break + return indent + + +#srccom: raw comments in the source,including ''' and original indent +def sampcd_extract_and_run(srccom, name, logf): + sampcd_begins = find_all(srccom, ".. code-block:: python") + #no sample code + #have sample code but not formatted by code block + + status = [] + ''' + status: + + 3:error sample code + 2:have sample code but format is wrong + 1:no sample code + 0:successful + -1:no comments found + -2:in white list + there may be several examples in a source comment + so status is a list to contain the states + ''' + + if (len(sampcd_begins) == 0): + if (srccom.find("Examples:") != -1): + print "----example code check----\n" + logf.write("\n----example code check----\n") + if (srccom.find(">>>") != -1): + logf.write( + "Deprecated sample code style:\n\n Examples:\n\n >>>codeline\n >>>codeline\n\n\n " + + "Please use '.. code-block:: python' to " + + "format sample code.\n") + print( + "Deprecated sample code style:\n\n Examples:\n\n >>>codeline\n >>>codeline\n\n\n " + + "Please use '.. code-block:: python' to " + + "format sample code.\n") + status.append(2) + else: + print "No sample code!\n" + logf.write("\nNo sample code!\n") + status.append(1) + + for y in range(1, len(sampcd_begins) + 1): + sampcd_begin = sampcd_begins[y - 1] + sampcd = srccom[sampcd_begin + len(".. code-block:: python") + 1:] + sampcd = sampcd.split("\n") + #remove starting empty lines + while sampcd[0].replace(' ', '').replace('\t', '') == '': + sampcd.pop(0) + min_indent = check_indent(sampcd[0]) + sampcd_to_write = [] + for i in range(0, len(sampcd)): + cdline = sampcd[i] + #handle empty lines or those only with spaces/tabs + if cdline.strip() == '': + continue + this_indent = check_indent(cdline) + if (this_indent < min_indent): + break + else: + cdline = cdline.replace('\t', ' ') + sampcd_to_write.append(cdline[min_indent:]) + sampcd = '\n'.join(sampcd_to_write) + sampcd = '\nimport os\n' + 'os.environ["CUDA_VISIBLE_DEVICES"] = ""\n' + sampcd + sampcd += '\nprint ' + '\"' + name + ' sample code is executed successfully!\"\n' + + print "\n" + print "Sample code " + str(y) + " extracted for " + name + " :" + print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" + print(sampcd) + + logf.write("\nSample code extracted for " + name + " :\n") + logf.write("\n" + sampcd + "\n") + + print "----example code check----\n" + print "executing sample code ....." + + logf.write("\n----example code check----\n") + logf.write("\nexecuting sample code .....\n") + + if (len(sampcd_begins) > 1): + tfname = name + "_example_" + str(y) + ".py" + else: + tfname = name + "_example" + ".py" + + tempf = open("samplecode_temp/" + tfname, 'w') + tempf.write(sampcd) + tempf.close() + cmd = ["python", "samplecode_temp/" + tfname] + subprc = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = subprc.communicate() + print "execution result:" + logf.write("\nexecution result:\n") + msg = "\n".join(output) + + if (msg.find("sample code is executed successfully!") == -1): + print("Error Raised from Sample Code " + name + " :\n") + logf.write("\nError Raised from Sample Code " + name + " :\n") + status.append(3) + else: + status.append(0) + + #msg is the returned code execution report + print msg + logf.write("\n" + msg + "\n") + os.remove("samplecode_temp/" + tfname) + + print status + logf.write("\n" + "execution status" + str(status) + "\n") + return status + + +''' +to extract a def function/class comments body +start_from: the line num of "def" header +''' + + +def single_defcom_extract(start_from, srcls, is_class_begin=False): + i = start_from + fcombody = "" #def comment body + comstart = -1 + comstyle = 0 + + for x in range(i + 1, len(srcls)): + if is_class_begin: + if (srcls[x].startswith(' def ')): + break + if ((srcls[x].startswith('def ') or srcls[x].startswith('class '))): + break + else: + if (comstart == -1 and srcls[x].replace(" ", '').replace( + "\t", '').replace("\n", '').startswith("\"\"\"")): + comstart = x + comstyle = 2 + continue + if (comstyle == 2 and comstart != -1 and + srcls[x].replace(" ", '').replace("\t", '').replace( + "\n", '').startswith("\"\"\"")): + break + if (comstart == -1 and srcls[x].replace(" ", '').replace( + "\t", '').replace("\n", '').startswith("\'\'\'")): + comstart = x + comstyle = 1 + continue + if (comstyle == 1 and comstart != -1 and + srcls[x].replace(" ", '').replace("\t", '').replace( + "\n", '').startswith("\'\'\'")): + break + if (comstart != + -1): #when the comments start, begin to add line to fcombody + fcombody += srcls[x] + return fcombody + + +def print_header(logf, htype, name): + print "\n" + print htype + " name:" + name + print "-----------------------" + logf.write("\n\n" + htype + " name:" + name + "\n") + logf.write("-----------------------\n") + + +def srccoms_extract(srcfile, logf, status_all, wlist): + print "source file name:" + srcfile.name + print "---------------------------------------------------" + + logf.write("source file name:" + srcfile.name + "\n") + logf.write("---------------------------------------------------\n\n") + + srcc = srcfile.read() + + #2. get defs and classes header line number + #set file pointer to its beginning + srcfile.seek(0, 0) + srcls = srcfile.readlines() #source lines + + #1. fetch__all__ list + allidx = srcc.find("__all__") + + if (allidx != -1): + alllist = [] + if (srcfile.name.find("ops.py") != -1): + for ai in range(0, len(srcls)): + if (srcls[ai].startswith("__all__")): + lb = srcls[ai].find('[') + rb = srcls[ai].find(']') + if (lb == -1): + continue + allele = srcls[ai][lb + 1:rb].replace("'", '').replace( + " ", '').replace("\"", '') + alllist.append(allele) + alllist.remove('') + else: + alllist_b = allidx + len("__all__") + allstr = srcc[alllist_b + srcc[alllist_b:].find("[") + 1:alllist_b + + srcc[alllist_b:].find("]")] + allstr = allstr.replace("\n", '').replace(" ", '').replace( + "'", '').replace("\"", '') + alllist = allstr.split(',') + if '' in alllist: + alllist.remove('') + print "__all__:" + str(alllist) + "\n" + logf.write("__all__:" + str(alllist) + "\n\n") + api_alllist_count = len(alllist) + api_count = 0 + handled = [] + if (srcfile.name.find("ops.py") != -1): + for i in range(0, len(srcls)): + if srcls[i].find("__doc__") != -1: + opname = srcls[i][:srcls[i].find("__doc__") - 1] + print_header(logf, "def", opname) + if opname in wlist: + print opname + " is in white list, thus skipped" + logf.write("\n" + opname + + " is in white list, thus skipped\n") + status_all[opname] = [-2] + print status_all[opname] + logf.write("\n" + "execution status" + str(status_all[ + opname]) + "\n") + continue + comstart = i + for j in range(i, len(srcls)): + if (srcls[j].find("\"\"\"") != -1): + comstart = i + opcom = "" + for j in range(comstart + 1, len(srcls)): + opcom += srcls[j] + if (srcls[j].find("\"\"\"") != -1): + break + if opname in wlist: + print opname + " is in white list, thus skipped" + logf.write("\n" + opname + + " is in white list, thus skipped\n") + status_all[opname] = [-2] + print status_all[opname] + logf.write("\n" + "execution status" + str(status_all[ + opname]) + "\n") + continue + status = sampcd_extract_and_run(opcom, opname, logf) + api_count += 1 + status_all[opname] = status + handled.append(opname) + + for i in range(0, len(srcls)): + if srcls[i].startswith('def '): + f_header = srcls[i].replace(" ", '') + fn = f_header[len('def'):f_header.find('(')] #function name + if fn in handled: + continue + print_header(logf, "def", fn) + if fn in alllist: + api_count += 1 + if fn in wlist: + print fn + " is in white list, thus skipped" + logf.write("\n" + fn + + " is in white list, thus skipped\n") + status_all[fn] = [-2] + print status_all[fn] + logf.write("\n" + "execution status" + str(status_all[ + fn]) + "\n") + continue + fcombody = single_defcom_extract(i, srcls) + if (fcombody == ""): + print "no comments in function " + fn + logf.write("no comments in function " + fn + "\n\n") + status_all[fn] = [-1] + print status_all[fn] + logf.write("\n" + "execution status" + str(status_all[ + fn]) + "\n") + continue + else: + status = sampcd_extract_and_run(fcombody, fn, logf) + status_all[fn] = status + else: + print fn + " not in __all__ list" + logf.write(fn + " not in __all__ list\n\n") + if srcls[i].startswith('class '): + print srcls[i] + c_header = srcls[i].replace(" ", '') + cn = c_header[len('class'):c_header.find('(')] #function name + if cn in handled: + continue + print_header(logf, "class", cn) + if cn in alllist: + api_count += 1 + if cn in wlist: + print cn + " is in white list, thus skipped" + logf.write("\n" + cn + + " is in white list, thus skipped\n") + status_all[cn] = [-2] + print status_all[cn] + logf.write("\n" + "execution status" + str(status_all[ + cn]) + "\n") + continue + allcoms = [] + classcom = single_defcom_extract(i, srcls, True) + allcoms.append(classcom) + if (classcom != ""): + status = sampcd_extract_and_run(classcom, cn, logf) + status_all[cn] = status + else: + print "no comments in class itself " + cn + "\n" + logf.write("no comments in class itself " + cn + + "\n\n\n") + status_all[cn] = [-1] + print status_all[cn] + logf.write("\n" + "execution status" + str(status_all[ + cn]) + "\n") + for x in range( + i + 1, + len(srcls)): #from the next line of class header + if (srcls[x].startswith('def ') or + srcls[x].startswith('class ')): + break + else: + if (srcls[x].startswith( + ' def ')): #detect a mehtod header.. + thisl = srcls[x] + indent = len(thisl) - len(thisl.lstrip()) + mn = thisl[indent + len('def '):thisl.find( + '(')] #method name + name = cn + "." + mn + print_header(logf, "method", name) + if mn.startswith('_'): + print mn + "is hidden, not visible to users" + logf.write( + "\n" + mn + + "is hidden, not visible to users\n") + continue + if name in wlist: + print name + " is in white list, thus skipped" + logf.write( + "\n" + name + + " is in white list, thus skipped\n") + status_all[name] = [-2] + print status_all[name] + logf.write("\n" + "execution status" + str( + status_all[name]) + "\n") + continue + thismethod = [] + thismtdstr = "" + thismethod.append(thisl[indent:]) + thismtdstr += thisl[indent:] + for y in range(x + 1, len(srcls)): + if (srcls[y].startswith('def ') or + srcls[y].startswith('class ')): + break + elif (srcls[y].lstrip().startswith('def ')): + break + else: + thismethod.append(srcls[y][indent:]) + thismtdstr += srcls[y][indent:] + thismtdcom = single_defcom_extract(0, + thismethod) + allcoms.append(thismtdcom) + if (thismtdcom != ""): + status = sampcd_extract_and_run(thismtdcom, + name, logf) + status_all[name] = status + else: + print "no comments in method " + name + "\n" + logf.write("no comments in method " + name + + "\n\n\n") + status_all[name] = [-1] + print status_all[name] + logf.write("\n" + "execution status" + str( + status_all[name]) + "\n") + else: + print cn + " is not in __all__ list" + logf.write(cn + " is not in __all__ list\n\n") + return [ + srcfile.name + " all list length: " + str(api_alllist_count), + "analysed api count: " + str(api_count) + ] + + +filenames = [ + "layers/control_flow.py", "layers/io.py", "layers/nn.py", "layers/ops.py", + "layers/tensor.py", "layers/learning_rate_scheduler.py", + "layers/detection.py", "layers/metric_op.py" +] +filenames += [ + "dygraph/layers.py", "dygraph/base.py", "dygraph/nn.py", + "dygraph/tracer.py", "dygraph/profiler.py", "dygraph/parallel.py", + "dygraph/checkpoint.py", "dygraph/learning_rate_scheduler.py", + "dygraph/backward_strategy.py" +] + +filenames += [ + "data_feeder.py", "dataset.py", "clip.py", "metrics.py", "executor.py", + "initializer.py", "io.py", "nets.py", "optimizer.py", "profiler.py", + "regularizer.py", "backward.py", "average.py", "profiler.py", + "unique_name.py" +] + +wlist_inneed = [ + "append_LARS", "BuildStrategy.debug_graphviz_path", + "BuildStrategy.enable_sequential_execution", + "BuildStrategy.fuse_elewise_add_act_ops", + "BuildStrategy.fuse_relu_depthwise_conv", + "BuildStrategy.gradient_scale_strategy", "BuildStrategy.reduce_strategy", + "BuildStrategy.remove_unnecessary_lock", "BuildStrategy.sync_batch_norm", + "DynamicRNN.step_input", "DynamicRNN.static_input", "DynamicRNN.block", + "DynamicRNN.update_memory", "DynamicRNN.output", + "transpiler.DistributeTranspilerConfig", + "transpiler.DistributeTranspilerConfig.slice_var_up", + "transpiler.DistributeTranspilerConfig.split_method", + "transpiler.DistributeTranspilerConfig.min_block_size", + "DistributeTranspilerConfig.slice_var_up", + "DistributeTranspilerConfig.split_method", "ModelAverage.apply", + "ModelAverage.restore", "DistributeTranspilerConfig", + "DistributeTranspilerConfig.min_block_size", + "ExecutionStrategy.allow_op_delay", "load", "Accuracy.update", + "ChunkEvaluator.update", "ExecutionStrategy.num_iteration_per_drop_scope", + "ExecutionStrategy.num_threads", "CompiledProgram.with_inference_optimize", + "CompositeMetric.add_metric", "CompositeMetric.update", + "CompositeMetric.eval", "DetectionMAP.get_map_var", "MetricBase", + "MetricBase.reset", "MetricBase.get_config", "MetricBase.update", + "MetricBase.eval", "Accuracy.eval", "Auc.update", "Auc.eval", + "EditDistance.update", "EditDistance.eval", + "ExponentialMovingAverage.apply", "ExponentialMovingAverage.restore", + "ExponentialMovingAverage.update", "StaticRNN.step", "StaticRNN.step_input", + "StaticRNN.step_output", "StaticRNN.update_memory", "DetectionMAP.reset", + 'StaticRNN.output' +] + +wlist_temp = [ + 'elementwise_floordiv', 'Layer', 'Layer.create_parameter', + 'Layer.create_variable', 'Layer.sublayers', 'Layer.add_parameter', + 'Layer.add_sublayer', 'Layer.parameters', 'Tracer', 'Layer.full_name', + 'InMemoryDataset', 'layer_norm', 'bipartite_match', 'double_buffer', + 'cumsum', 'thresholded_relu', 'group_norm', 'random_crop', 'py_func', + 'row_conv', 'hard_shrink', 'ssd_loss', 'retinanet_target_assign', + 'InMemoryDataset.global_shuffle', 'InMemoryDataset.get_memory_data_size', + 'DetectionMAP', 'hash', 'InMemoryDataset.set_queue_num', 'LayerNorm', + 'Preprocessor', 'chunk_eval', 'GRUUnit', 'ExponentialMovingAverage', + 'QueueDataset.global_shuffle', 'NumpyArrayInitializer', + 'create_py_reader_by_data', 'InMemoryDataset.local_shuffle', + 'InMemoryDataset.get_shuffle_data_size', 'size', 'edit_distance', 'nce', + 'BilinearInitializer', 'NaturalExpDecay', 'noam_decay', + 'retinanet_detection_output', 'Pool2D', 'PipelineOptimizer', + 'generate_mask_labels', 'isfinite', + 'InMemoryDataset.set_fleet_send_batch_size', 'cuda_profiler', 'unfold', + 'Executor', 'InMemoryDataset.load_into_memory', 'ExponentialDecay', + 'BatchNorm', 'deformable_conv', 'InMemoryDataset.preload_into_memory', + 'py_reader', 'linear_lr_warmup', 'InMemoryDataset.wait_preload_done', + 'CosineDecay', 'roi_perspective_transform', 'unique', 'ones_like', + 'LambOptimizer', 'InMemoryDataset.release_memory', 'Conv2DTranspose', + 'QueueDataset.local_shuffle' +] +''' +white list of private API/ redundant API +''' +wlist_ignore = [ + 'elementwise_pow', 'WeightedAverage.reset', 'ChunkEvaluator.eval', + 'NCE.forward', 'elementwise_div', 'BilinearTensorProduct.forward', + 'NoamDecay.step', 'elementwise_min', 'PiecewiseDecay.step', + 'Conv3DTranspose.forward', 'elementwise_add', 'IfElse.output', + 'IfElse.true_block', 'InverseTimeDecay.step', 'PolynomialDecay.step', + 'Precision.eval', 'enabled', 'elementwise_max', 'stop_gperf_profiler', + 'IfElse.false_block', 'WeightedAverage.add', 'Auc.trapezoid_area', + 'elementwise_mul', 'GroupNorm.forward', 'SpectralNorm.forward', + 'elementwise_sub', 'Switch.case', 'IfElse.input', 'prepare_context', + 'PRelu.forward', 'Recall.update', 'start_gperf_profiler', + 'TreeConv.forward', 'Conv2D.forward', 'Switch.default', 'elementwise_mod', + 'Precision.update', 'WeightedAverage.eval', 'Conv3D.forward', + 'Embedding.forward', 'Recall.eval', 'FC.forward', 'While.block' +] + +wlist = wlist_temp + wlist_inneed + wlist_ignore +status_all = {} +logf = open("log.txt", 'w') +statusf = open("status.txt", 'w') + +if not os.path.isdir("./samplecode_temp"): + os.mkdir("./samplecode_temp") +for filename in filenames: + srcfile = open(filename, 'r') + counts = srccoms_extract(srcfile, logf, status_all, wlist) + logf.write("\n\n" + str(counts) + "\n\n") + srcfile.close() +for root, dirs, files in os.walk("./samplecode_temp"): + for fntemp in files: + os.remove("./samplecode_temp/" + fntemp) + +os.rmdir("./samplecode_temp") +statusf.write("status_all:\n" + str(status_all)) +status_groups = {-2: [], -1: [], 0: [], 1: [], 2: [], 3: []} +ci_pass = True + +for key in status_all: + statusl = status_all[key] + for ele in statusl: + if (ele != 0 and ele != -2): + ci_pass = False + break + if len(statusl) == 1: + status_groups[statusl[0]].append(key) + else: + for u in range(0, len(statusl)): + status_groups[statusl[u]].append(key + '_' + str(u + 1)) + +statusf.write('\n\ngrouped apis:\n' + str(status_groups) + '\n') +statusf.close() +logf.close() + +temp_wlistf = open("tempwlist.txt", 'w') +wlist_temp = status_groups[1] + status_groups[2] + status_groups[ + 3] + status_groups[-1] +temp_wlistf.write(str(wlist_temp)) +temp_wlistf.close() +print str(wlist_temp) + +if not ci_pass: + print "Mistakes found in sample codes, refer to the log for details" + exit(1) +else: + print "Sample code check is successful!"